From c9f02630dffa142606de52f184034a2fbb433191 Mon Sep 17 00:00:00 2001 From: Matthew DeGuzman Date: Thu, 14 May 2026 15:00:36 -0400 Subject: [PATCH 1/5] Add experimental file access hooks --- README.md | 2 + SHELL_FEATURES.md | 1 + allowedpaths/sandbox.go | 16 ++ analysis/symbols_interp.go | 3 + interp/api.go | 44 +++- interp/file_access_hooks.go | 427 +++++++++++++++++++++++++++++++ interp/file_access_hooks_test.go | 186 ++++++++++++++ interp/runner.go | 18 +- interp/runner_exec.go | 89 +++++-- interp/runner_expand.go | 11 +- interp/runner_redir.go | 4 +- 11 files changed, 762 insertions(+), 39 deletions(-) create mode 100644 interp/file_access_hooks.go create mode 100644 interp/file_access_hooks_test.go diff --git a/README.md b/README.md index ce13792ce..edfda10dc 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,8 @@ Every access path is default-deny: **AllowedPaths** restricts all file operations to specified directories using Go's `os.Root` API (`openat` syscalls), making it immune to symlink traversal, TOCTOU races, and `..` escape attacks. Configured directories that cannot be opened (missing, not a directory, no permission) are skipped with a diagnostic message; by default these messages are flushed once to the runner's stderr at construction time. Callers that need to keep stderr clean of sandbox diagnostics can route them to a dedicated sink with `WarningsWriter(io.Writer)` or retrieve them programmatically via `Runner.Warnings()`. +Library callers can install passive file-access instrumentation with `WithFileAccessHooks`. When configured, rshell calls before/after hooks around sandboxed file opens, directory reads, stat/lstat/readlink/access checks, input redirections, command substitution file shortcuts, and glob expansion. The hooks receive command/source/operation/path context plus lightweight pre/post metadata, but cannot authorize, deny, rewrite, or otherwise alter file access. + > **Note:** The `ss`, `ip route`, and `df` builtins bypass `AllowedPaths` for their kernel-state reads. `ss` and `ip route` open `/proc/net/*` paths directly; `df` reads `/proc/self/mountinfo` (Linux) or calls `getfsstat(2)` (macOS), then issues `unix.Statfs(2)` against every kernel-reported mount point. These paths are hardcoded — never derived from user input — and `Statfs` returns metadata only (block / inode counts, filesystem type, block size). There is no sandbox-escape risk, but operators cannot use `AllowedPaths` to block `ss` from enumerating local sockets, `ip route` from reading the routing table, or `df` from reporting mount-table capacity — these reads succeed regardless of the configured path policy. **ProcPath** (Linux-only) overrides the proc filesystem root used by the `ps` builtin (default `/proc`). This is a privileged option set at runner construction time by trusted caller code — scripts cannot influence it. Access to the proc path is intentionally not subject to `AllowedPaths` restrictions, since proc is a read-only virtual filesystem that does not expose host data under the normal file hierarchy. diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 4aa0f4db9..4d7dd3d43 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -108,6 +108,7 @@ The in-shell `help` command mirrors these feature categories: run `help` for a c - ✅ AllowedCommands — restricts which commands (builtins or external) may be executed; commands require the `rshell:` namespace prefix (e.g. `rshell:cat`); if not set, no commands are allowed - ✅ AllowedPaths filesystem sandboxing — restricts all file access to specified directories +- ✅ File-access hooks — library callers can opt in to passive before/after metadata events for sandboxed file opens, directory reads, stat/lstat/readlink/access checks, input redirects, command substitution file shortcuts, and glob expansion - ✅ Whole-run execution timeout — callers can bound a `Run()` call via `context.Context`, `interp.MaxExecutionTime`, or the CLI `--timeout` flag; the deadline applies to the entire script, not each individual command - ✅ ProcPath — overrides the proc filesystem path used by `ps` (default `/proc`; Linux-only; useful for testing/container environments) - ❌ External commands — blocked by default; requires an ExecHandler to be configured and the binary to be within AllowedPaths diff --git a/allowedpaths/sandbox.go b/allowedpaths/sandbox.go index 76263d410..85f98d0bd 100644 --- a/allowedpaths/sandbox.go +++ b/allowedpaths/sandbox.go @@ -631,6 +631,22 @@ func (s *Sandbox) Readlink(path string, cwd string) (string, error) { return target, nil } +// ResolvePath returns a best-effort absolute path after applying the same +// sandbox root and cross-root symlink resolution used by filesystem +// operations. When preserveLast is true, the final path component is not +// resolved if it is a symlink, matching lstat/readlink semantics. +func (s *Sandbox) ResolvePath(path string, cwd string, preserveLast bool) (string, error) { + absPath := filepath.Clean(toAbs(path, cwd)) + ar, relPath, ok := s.resolveRootFollowingSymlinks(absPath, preserveLast) + if !ok { + return "", &os.PathError{Op: "resolve", Path: path, Err: os.ErrPermission} + } + if relPath == "." { + return ar.absPath, nil + } + return filepath.Join(ar.absPath, relPath), nil +} + // SetHostPrefix overrides the mount prefix used to translate host-absolute // symlink targets inside containers. func (s *Sandbox) SetHostPrefix(prefix string) { diff --git a/analysis/symbols_interp.go b/analysis/symbols_interp.go index 3da6c1a28..c63df0234 100644 --- a/analysis/symbols_interp.go +++ b/analysis/symbols_interp.go @@ -41,6 +41,8 @@ var interpAllowedSymbols = []string{ "io.Writer", // 🟢 interface type for writing; no side effects. "io/fs.DirEntry", // 🟢 interface type for directory entries; no side effects. "io/fs.FileInfo", // 🟢 interface type for file metadata; no side effects. + "io/fs.FileMode", // 🟢 file mode bits type for metadata; pure type. + "io/fs.ModeSymlink", // 🟢 symlink mode bit constant for metadata classification; pure constant. "io/fs.ReadDirFile", // 🟢 read-only directory handle interface; no write capability. "maps.Insert", // 🟢 inserts all key-value pairs from one map into another; pure function. "os.DirEntry", // 🟢 type alias for fs.DirEntry; no side effects. @@ -50,6 +52,7 @@ var interpAllowedSymbols = []string{ "os.O_RDONLY", // 🟢 read-only file flag constant; pure constant. "os.PathError", // 🟢 error type wrapping path and operation; pure type. "os.Pipe", // 🟠 creates an OS pipe pair; needed for shell pipelines. + "path/filepath.Clean", // 🟢 normalizes path strings for reporting; pure function, no I/O. "path/filepath.IsAbs", // 🟢 checks if path is absolute; pure function, no I/O. "path/filepath.Join", // 🟢 joins path elements; pure function, no I/O. "path/filepath.ListSeparator", // 🟢 OS-specific path list separator; pure constant. diff --git a/interp/api.go b/interp/api.go index 3a45772fa..5a566906f 100644 --- a/interp/api.go +++ b/interp/api.go @@ -94,6 +94,11 @@ type runnerConfig struct { // New() and shared across subshells via runnerConfig value copy. proc *builtins.ProcProvider + // fileAccessHooks, when configured, receives passive before/after events + // for filesystem operations performed by rshell. Hooks are observability + // only: they cannot authorize, deny, or alter file access. + fileAccessHooks FileAccessHooks + // usedNew is set by New() and checked in Reset() to ensure a Runner // was properly constructed rather than zero-initialized. usedNew bool @@ -193,6 +198,16 @@ type runnerState struct { // (including concurrent pipe subshells) via pointer, and must be // accessed atomically. globReadDirCount *atomic.Int64 + + // fileAccessSeq assigns stable before/after event ids for the current + // Run(). It is shared with subshells so concurrent pipeline stages do not + // produce colliding ids. + fileAccessSeq *atomic.Int64 + + // fileAccessCommand carries a best-effort command name while expanding a + // simple command, so glob and command-substitution file accesses can be + // attributed before the final expanded argv is known. + fileAccessCommand string } // A Runner interprets shell programs. It can be reused, but it is not safe for @@ -586,6 +601,7 @@ func (r *Runner) Run(ctx context.Context, node syntax.Node) (retErr error) { r.runStdout = r.stdout r.startTime = time.Now() r.globReadDirCount = &atomic.Int64{} + r.fileAccessSeq = &atomic.Int64{} r.fillExpandConfig(ctx) if err := validateNode(node); err != nil { fmt.Fprintln(r.stderr, err) @@ -802,19 +818,21 @@ func (r *Runner) subshell(background bool) *Runner { r2 := &Runner{ runnerConfig: r.runnerConfig, runnerState: runnerState{ - Dir: r.Dir, - Params: r.Params, - stdin: r.stdin, - stdout: r.stdout, - stderr: r.stderr, - runStdin: r.runStdin, - runStdout: r.runStdout, - inPipeline: r.inPipeline, - filename: r.filename, - exit: r.exit, - lastExit: r.lastExit, - startTime: r.startTime, - globReadDirCount: r.globReadDirCount, + Dir: r.Dir, + Params: r.Params, + stdin: r.stdin, + stdout: r.stdout, + stderr: r.stderr, + runStdin: r.runStdin, + runStdout: r.runStdout, + inPipeline: r.inPipeline, + filename: r.filename, + exit: r.exit, + lastExit: r.lastExit, + startTime: r.startTime, + globReadDirCount: r.globReadDirCount, + fileAccessSeq: r.fileAccessSeq, + fileAccessCommand: r.fileAccessCommand, }, } r2.writeEnv = newOverlayEnviron(r.writeEnv, background) diff --git a/interp/file_access_hooks.go b/interp/file_access_hooks.go new file mode 100644 index 000000000..34b9b5792 --- /dev/null +++ b/interp/file_access_hooks.go @@ -0,0 +1,427 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp + +import ( + "context" + "io" + "io/fs" + "os" + "path/filepath" + "sync/atomic" + "time" + + "mvdan.cc/sh/v3/syntax" + + "github.com/DataDog/rshell/allowedpaths" +) + +// FileAccessOp identifies the kind of filesystem operation being traced. +type FileAccessOp string + +const ( + FileAccessOpOpen FileAccessOp = "open" + FileAccessOpReadDir FileAccessOp = "readdir" + FileAccessOpOpenDir FileAccessOp = "opendir" + FileAccessOpStat FileAccessOp = "stat" + FileAccessOpLstat FileAccessOp = "lstat" + FileAccessOpReadlink FileAccessOp = "readlink" + FileAccessOpAccess FileAccessOp = "access" + FileAccessOpIsDirEmpty FileAccessOp = "is_dir_empty" +) + +// FileAccessSource identifies which rshell surface initiated the access. +type FileAccessSource string + +const ( + FileAccessSourceBuiltin FileAccessSource = "builtin" + FileAccessSourceInputRedirect FileAccessSource = "input_redirect" + FileAccessSourceGlob FileAccessSource = "glob" + FileAccessSourceCommandSubstitute FileAccessSource = "command_substitution" +) + +// FileAccessResult is populated on after-events to report operation outcome. +type FileAccessResult string + +const ( + FileAccessResultPending FileAccessResult = "" + FileAccessResultSuccess FileAccessResult = "success" + FileAccessResultError FileAccessResult = "error" +) + +// FileAccessHooks are passive callbacks invoked around rshell filesystem +// operations when configured by [WithFileAccessHooks]. Hooks are for +// observability only: they cannot authorize, deny, rewrite, or otherwise +// alter file access. +type FileAccessHooks struct { + Before func(context.Context, FileAccessEvent) + After func(context.Context, FileAccessEvent) +} + +// FileAccessEvent describes one filesystem operation observed by rshell. +// Before and after callbacks for the same operation share the same ID. +type FileAccessEvent struct { + ID int64 + + Command string + Source FileAccessSource + Op FileAccessOp + + RequestedPath string + AbsPath string + ResolvedPath string + CWD string + + Flags int + Mode os.FileMode + // AccessMode is set for access checks. Bits follow the shell test + // convention: 0x04 read, 0x02 write, 0x01 execute. + AccessMode uint32 + + Result FileAccessResult + Err string + + PreMetadata *FileAccessMetadata + PreMetadataErr string + PostMetadata *FileAccessMetadata + PostMetadataErr string +} + +// FileAccessMetadata is lightweight file metadata captured for a traced access. +// It intentionally contains no file contents. +type FileAccessMetadata struct { + Mode fs.FileMode + Size int64 + ModTime time.Time + IsRegular bool + IsDir bool + IsSymlink bool + FileID *FileAccessFileID +} + +// FileAccessFileID identifies a file when the platform exposes stable identity. +// On Unix this is device+inode; on Windows it is volume serial+file index. +type FileAccessFileID struct { + Dev uint64 + Ino uint64 +} + +// WithFileAccessHooks installs passive before/after filesystem access hooks. +// Nil callbacks are ignored. When no callbacks are configured, rshell performs +// no extra metadata collection for this feature. +func WithFileAccessHooks(hooks FileAccessHooks) RunnerOption { + return func(r *Runner) error { + r.fileAccessHooks = hooks + return nil + } +} + +type fileAccessMetadataMode uint8 + +const ( + fileAccessMetadataNone fileAccessMetadataMode = iota + fileAccessMetadataStat + fileAccessMetadataLstat +) + +func (r *Runner) fileAccessHooksEnabled() bool { + return r.fileAccessHooks.Before != nil || r.fileAccessHooks.After != nil +} + +func (r *Runner) beginFileAccess( + ctx context.Context, + command string, + source FileAccessSource, + op FileAccessOp, + path string, + cwd string, + flags int, + mode os.FileMode, + accessMode uint32, + metadataMode fileAccessMetadataMode, +) FileAccessEvent { + if !r.fileAccessHooksEnabled() { + return FileAccessEvent{} + } + if r.fileAccessSeq == nil { + r.fileAccessSeq = &atomic.Int64{} + } + absPath := fileAccessAbsPath(path, cwd) + resolvedPath := absPath + if r.sandbox != nil { + preserveLast := metadataMode == fileAccessMetadataLstat + if resolved, err := r.sandbox.ResolvePath(path, cwd, preserveLast); err == nil { + resolvedPath = resolved + } else { + resolvedPath = r.sandbox.CanonicalizeRootPrefix(absPath) + } + } + + event := FileAccessEvent{ + ID: r.fileAccessSeq.Add(1), + Command: command, + Source: source, + Op: op, + RequestedPath: path, + AbsPath: absPath, + ResolvedPath: resolvedPath, + CWD: cwd, + Flags: flags, + Mode: mode, + AccessMode: accessMode, + } + event.PreMetadata, event.PreMetadataErr = r.collectFileAccessMetadata(path, cwd, metadataMode) + r.callFileAccessHook(ctx, r.fileAccessHooks.Before, event) + return event +} + +func (r *Runner) finishFileAccess( + ctx context.Context, + event FileAccessEvent, + info fs.FileInfo, + metadataErr error, + accessErr error, + postMetadataMode fileAccessMetadataMode, +) { + if !r.fileAccessHooksEnabled() { + return + } + if accessErr != nil { + event.Result = FileAccessResultError + event.Err = fileAccessErrString(accessErr) + } else { + event.Result = FileAccessResultSuccess + } + if metadataErr != nil { + event.PostMetadataErr = fileAccessErrString(metadataErr) + } else if info != nil { + event.PostMetadata = r.fileAccessMetadataFromInfo(event.RequestedPath, event.CWD, info) + } else if accessErr == nil { + event.PostMetadata, event.PostMetadataErr = r.collectFileAccessMetadata(event.RequestedPath, event.CWD, postMetadataMode) + } + r.callFileAccessHook(ctx, r.fileAccessHooks.After, event) +} + +func (r *Runner) observedSandboxOpen( + ctx context.Context, + command string, + source FileAccessSource, + path string, + cwd string, + flags int, + mode os.FileMode, + open func() (io.ReadWriteCloser, error), +) (io.ReadWriteCloser, error) { + event := r.beginFileAccess(ctx, command, source, FileAccessOpOpen, path, cwd, flags, mode, 0, fileAccessMetadataStat) + f, err := open() + var ( + info fs.FileInfo + infoErr error + ) + if err == nil { + if st, ok := f.(interface{ Stat() (fs.FileInfo, error) }); ok { + info, infoErr = st.Stat() + } + } + r.finishFileAccess(ctx, event, info, infoErr, err, fileAccessMetadataStat) + return f, err +} + +func (r *Runner) observedReadDir( + ctx context.Context, + command string, + source FileAccessSource, + path string, + cwd string, + readDir func() ([]fs.DirEntry, error), +) ([]fs.DirEntry, error) { + event := r.beginFileAccess(ctx, command, source, FileAccessOpReadDir, path, cwd, 0, 0, 0, fileAccessMetadataStat) + entries, err := readDir() + r.finishFileAccess(ctx, event, nil, nil, err, fileAccessMetadataStat) + return entries, err +} + +func (r *Runner) observedOpenDir( + ctx context.Context, + command string, + source FileAccessSource, + path string, + cwd string, + openDir func() (fs.ReadDirFile, error), +) (fs.ReadDirFile, error) { + event := r.beginFileAccess(ctx, command, source, FileAccessOpOpenDir, path, cwd, 0, 0, 0, fileAccessMetadataStat) + f, err := openDir() + var ( + info fs.FileInfo + infoErr error + ) + if err == nil { + if st, ok := f.(interface{ Stat() (fs.FileInfo, error) }); ok { + info, infoErr = st.Stat() + } + } + r.finishFileAccess(ctx, event, info, infoErr, err, fileAccessMetadataStat) + return f, err +} + +func (r *Runner) observedIsDirEmpty( + ctx context.Context, + command string, + source FileAccessSource, + path string, + cwd string, + isDirEmpty func() (bool, error), +) (bool, error) { + event := r.beginFileAccess(ctx, command, source, FileAccessOpIsDirEmpty, path, cwd, 0, 0, 0, fileAccessMetadataStat) + empty, err := isDirEmpty() + r.finishFileAccess(ctx, event, nil, nil, err, fileAccessMetadataStat) + return empty, err +} + +func (r *Runner) observedReadDirLimited( + ctx context.Context, + command string, + source FileAccessSource, + path string, + cwd string, + readDirLimited func() ([]fs.DirEntry, bool, error), +) ([]fs.DirEntry, bool, error) { + event := r.beginFileAccess(ctx, command, source, FileAccessOpReadDir, path, cwd, 0, 0, 0, fileAccessMetadataStat) + entries, truncated, err := readDirLimited() + r.finishFileAccess(ctx, event, nil, nil, err, fileAccessMetadataStat) + return entries, truncated, err +} + +func (r *Runner) observedStat( + ctx context.Context, + command string, + source FileAccessSource, + op FileAccessOp, + path string, + cwd string, + metadataMode fileAccessMetadataMode, + stat func() (fs.FileInfo, error), +) (fs.FileInfo, error) { + event := r.beginFileAccess(ctx, command, source, op, path, cwd, 0, 0, 0, metadataMode) + info, err := stat() + r.finishFileAccess(ctx, event, info, nil, err, fileAccessMetadataNone) + return info, err +} + +func (r *Runner) observedReadlink( + ctx context.Context, + command string, + source FileAccessSource, + path string, + cwd string, + readlink func() (string, error), +) (string, error) { + event := r.beginFileAccess(ctx, command, source, FileAccessOpReadlink, path, cwd, 0, 0, 0, fileAccessMetadataLstat) + target, err := readlink() + r.finishFileAccess(ctx, event, nil, nil, err, fileAccessMetadataLstat) + return target, err +} + +func (r *Runner) observedAccess( + ctx context.Context, + command string, + source FileAccessSource, + path string, + cwd string, + mode uint32, + access func() error, +) error { + event := r.beginFileAccess(ctx, command, source, FileAccessOpAccess, path, cwd, 0, 0, mode, fileAccessMetadataStat) + err := access() + r.finishFileAccess(ctx, event, nil, nil, err, fileAccessMetadataStat) + return err +} + +func (r *Runner) callFileAccessHook(ctx context.Context, hook func(context.Context, FileAccessEvent), event FileAccessEvent) { + if hook == nil { + return + } + defer func() { + _ = recover() + }() + hook(ctx, event) +} + +func (r *Runner) collectFileAccessMetadata(path string, cwd string, mode fileAccessMetadataMode) (*FileAccessMetadata, string) { + if mode == fileAccessMetadataNone { + return nil, "" + } + var ( + info fs.FileInfo + err error + ) + switch mode { + case fileAccessMetadataStat: + info, err = r.sandbox.Stat(path, cwd) + case fileAccessMetadataLstat: + info, err = r.sandbox.Lstat(path, cwd) + } + if err != nil { + return nil, fileAccessErrString(err) + } + return r.fileAccessMetadataFromInfo(path, cwd, info), "" +} + +func (r *Runner) fileAccessMetadataFromInfo(path string, cwd string, info fs.FileInfo) *FileAccessMetadata { + if info == nil { + return nil + } + md := &FileAccessMetadata{ + Mode: info.Mode(), + Size: info.Size(), + ModTime: info.ModTime(), + IsRegular: info.Mode().IsRegular(), + IsDir: info.IsDir(), + IsSymlink: info.Mode()&fs.ModeSymlink != 0, + } + absPath := fileAccessAbsPath(path, cwd) + if dev, ino, ok := allowedpaths.FileIdentity(absPath, info, r.sandbox); ok { + md.FileID = &FileAccessFileID{Dev: dev, Ino: ino} + } + return md +} + +func fileAccessErrString(err error) string { + if err == nil { + return "" + } + return allowedpaths.PortableErrMsg(err) +} + +func fileAccessAbsPath(path string, cwd string) string { + if filepath.IsAbs(path) { + return filepath.Clean(path) + } + return filepath.Clean(filepath.Join(cwd, path)) +} + +func simpleCommandName(cm syntax.Command) string { + call, ok := cm.(*syntax.CallExpr) + if !ok || len(call.Args) == 0 { + return "" + } + return simpleWordLiteral(call.Args[0]) +} + +func simpleWordLiteral(word *syntax.Word) string { + if word == nil { + return "" + } + var out string + for _, part := range word.Parts { + lit, ok := part.(*syntax.Lit) + if !ok { + return "" + } + out += lit.Value + } + return out +} diff --git a/interp/file_access_hooks_test.go b/interp/file_access_hooks_test.go new file mode 100644 index 000000000..f634a8cb7 --- /dev/null +++ b/interp/file_access_hooks_test.go @@ -0,0 +1,186 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type capturedFileAccessEvent struct { + phase string + event FileAccessEvent +} + +func runWithFileAccessHooks(t *testing.T, dir string, script string, hooks FileAccessHooks) error { + t.Helper() + prog, err := ParseScript(script, "") + require.NoError(t, err) + r, err := New( + StdIO(nil, io.Discard, io.Discard), + AllowedPaths([]string{dir}), + allowAllCommandsOpt(), + WithFileAccessHooks(hooks), + ) + require.NoError(t, err) + t.Cleanup(func() { r.Close() }) + return r.Run(context.Background(), prog) +} + +func TestFileAccessHooksOpenFileBeforeAfter(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.txt") + require.NoError(t, os.WriteFile(input, []byte("hello\n"), 0o644)) + + var events []capturedFileAccessEvent + hooks := FileAccessHooks{ + Before: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "before", event: event}) + }, + After: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "after", event: event}) + }, + } + + require.NoError(t, runWithFileAccessHooks(t, dir, "cat input.txt", hooks)) + require.Len(t, events, 2) + + before := events[0].event + after := events[1].event + assert.Equal(t, "before", events[0].phase) + assert.Equal(t, "after", events[1].phase) + assert.Equal(t, before.ID, after.ID) + assert.Equal(t, "cat", before.Command) + assert.Equal(t, FileAccessSourceBuiltin, before.Source) + assert.Equal(t, FileAccessOpOpen, before.Op) + assert.Equal(t, "input.txt", before.RequestedPath) + assert.Equal(t, input, before.AbsPath) + assert.Equal(t, input, before.ResolvedPath) + assert.Equal(t, dir, before.CWD) + require.NotNil(t, before.PreMetadata) + assert.True(t, before.PreMetadata.IsRegular) + assert.Equal(t, int64(len("hello\n")), before.PreMetadata.Size) + + assert.Equal(t, FileAccessResultSuccess, after.Result) + assert.Empty(t, after.Err) + require.NotNil(t, after.PostMetadata) + assert.True(t, after.PostMetadata.IsRegular) + assert.Equal(t, int64(len("hello\n")), after.PostMetadata.Size) +} + +func TestFileAccessHooksAttributeGlobToCommand(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "one.txt"), []byte("one\n"), 0o644)) + + var events []capturedFileAccessEvent + hooks := FileAccessHooks{ + Before: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "before", event: event}) + }, + After: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "after", event: event}) + }, + } + + require.NoError(t, runWithFileAccessHooks(t, dir, "echo *.txt", hooks)) + require.Len(t, events, 2) + + before := events[0].event + after := events[1].event + assert.Equal(t, FileAccessSourceGlob, before.Source) + assert.Equal(t, FileAccessOpReadDir, before.Op) + assert.Equal(t, "echo", before.Command) + assert.Equal(t, dir, before.RequestedPath) + assert.Equal(t, dir, before.AbsPath) + require.NotNil(t, before.PreMetadata) + assert.True(t, before.PreMetadata.IsDir) + assert.Equal(t, before.ID, after.ID) + assert.Equal(t, FileAccessResultSuccess, after.Result) + require.NotNil(t, after.PostMetadata) + assert.True(t, after.PostMetadata.IsDir) +} + +func TestFileAccessHooksAttributeCommandSubstitutionShortcut(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.txt") + require.NoError(t, os.WriteFile(input, []byte("needle\n"), 0o644)) + + var events []capturedFileAccessEvent + hooks := FileAccessHooks{ + Before: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "before", event: event}) + }, + After: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "after", event: event}) + }, + } + + require.NoError(t, runWithFileAccessHooks(t, dir, "echo $( Date: Thu, 14 May 2026 15:17:45 -0400 Subject: [PATCH 2/5] Attribute file hooks for expanded command names --- interp/file_access_hooks.go | 79 +++++++++++++++++++++++++++++- interp/file_access_hooks_test.go | 84 ++++++++++++++++++++++++++++++++ interp/runner_exec.go | 4 +- 3 files changed, 163 insertions(+), 4 deletions(-) diff --git a/interp/file_access_hooks.go b/interp/file_access_hooks.go index 34b9b5792..328fce38b 100644 --- a/interp/file_access_hooks.go +++ b/interp/file_access_hooks.go @@ -403,12 +403,21 @@ func fileAccessAbsPath(path string, cwd string) string { return filepath.Clean(filepath.Join(cwd, path)) } -func simpleCommandName(cm syntax.Command) string { +// fileAccessCommandName returns a best-effort command name for file-access +// attribution without invoking shell expansion or its side effects. +func (r *Runner) fileAccessCommandName(cm syntax.Command) string { call, ok := cm.(*syntax.CallExpr) if !ok || len(call.Args) == 0 { return "" } - return simpleWordLiteral(call.Args[0]) + if name := simpleWordLiteral(call.Args[0]); name != "" { + return name + } + name, ok := r.staticCommandWordValue(call.Args[0], true) + if !ok { + return "" + } + return name } func simpleWordLiteral(word *syntax.Word) string { @@ -425,3 +434,69 @@ func simpleWordLiteral(word *syntax.Word) string { } return out } + +func (r *Runner) staticCommandWordValue(word *syntax.Word, splitAndGlob bool) (string, bool) { + if word == nil { + return "", false + } + var out string + for _, part := range word.Parts { + value, ok := r.staticCommandWordPartValue(part, splitAndGlob) + if !ok { + return "", false + } + out += value + } + if out == "" { + return "", false + } + return out, true +} + +func (r *Runner) staticCommandWordPartValue(part syntax.WordPart, splitAndGlob bool) (string, bool) { + switch part := part.(type) { + case *syntax.Lit: + return part.Value, true + case *syntax.SglQuoted: + return part.Value, true + case *syntax.DblQuoted: + var out string + for _, part := range part.Parts { + value, ok := r.staticCommandWordPartValue(part, false) + if !ok { + return "", false + } + out += value + } + return out, true + case *syntax.ParamExp: + if !simpleParamExp(part) || part.Param == nil { + return "", false + } + value := r.lookupVar(part.Param.Value).String() + if splitAndGlob && unsafeUnquotedCommandValue(value) { + return "", false + } + return value, true + default: + return "", false + } +} + +func simpleParamExp(param *syntax.ParamExp) bool { + return param.Flags == nil && + !param.Excl && !param.Length && !param.Width && !param.IsSet && + param.NestedParam == nil && param.Index == nil && + len(param.Modifiers) == 0 && param.Slice == nil && + param.Repl == nil && param.Names == 0 && param.Exp == nil +} + +func unsafeUnquotedCommandValue(value string) bool { + for _, r := range value { + switch r { + case ' ', '\t', '\n', '*', '?', '[': + return true + } + } + return false +} diff --git a/interp/file_access_hooks_test.go b/interp/file_access_hooks_test.go index f634a8cb7..c67c7c0d1 100644 --- a/interp/file_access_hooks_test.go +++ b/interp/file_access_hooks_test.go @@ -142,6 +142,90 @@ func TestFileAccessHooksAttributeCommandSubstitutionShortcut(t *testing.T) { assert.True(t, after.PostMetadata.IsRegular) } +func TestFileAccessHooksAttributeGlobToExpandedCommandName(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "one.txt"), []byte("one\n"), 0o644)) + + var events []capturedFileAccessEvent + hooks := FileAccessHooks{ + Before: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "before", event: event}) + }, + After: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "after", event: event}) + }, + } + + require.NoError(t, runWithFileAccessHooks(t, dir, "cmd=echo; $cmd *.txt", hooks)) + require.Len(t, events, 2) + + before := events[0].event + after := events[1].event + assert.Equal(t, FileAccessSourceGlob, before.Source) + assert.Equal(t, FileAccessOpReadDir, before.Op) + assert.Equal(t, "echo", before.Command) + assert.Equal(t, before.ID, after.ID) + assert.Equal(t, FileAccessResultSuccess, after.Result) +} + +func TestFileAccessHooksAttributeCommandSubstitutionShortcutToExpandedCommandName(t *testing.T) { + dir := t.TempDir() + input := filepath.Join(dir, "input.txt") + require.NoError(t, os.WriteFile(input, []byte("needle\n"), 0o644)) + + var events []capturedFileAccessEvent + hooks := FileAccessHooks{ + Before: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "before", event: event}) + }, + After: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "after", event: event}) + }, + } + + require.NoError(t, runWithFileAccessHooks(t, dir, "cmd=echo; $cmd $( Date: Thu, 14 May 2026 15:25:36 -0400 Subject: [PATCH 3/5] Tighten file hook command attribution --- interp/file_access_hooks.go | 25 ++++++++++-- interp/file_access_hooks_test.go | 69 ++++++++++++++++++++++++++++++++ interp/runner_exec.go | 7 +--- interp/runner_redir.go | 3 ++ 4 files changed, 96 insertions(+), 8 deletions(-) diff --git a/interp/file_access_hooks.go b/interp/file_access_hooks.go index 328fce38b..0161307cb 100644 --- a/interp/file_access_hooks.go +++ b/interp/file_access_hooks.go @@ -420,6 +420,14 @@ func (r *Runner) fileAccessCommandName(cm syntax.Command) string { return name } +func (r *Runner) pushFileAccessCommand(command string) func() { + prev := r.fileAccessCommand + r.fileAccessCommand = command + return func() { + r.fileAccessCommand = prev + } +} + func simpleWordLiteral(word *syntax.Word) string { if word == nil { return "" @@ -474,7 +482,7 @@ func (r *Runner) staticCommandWordPartValue(part syntax.WordPart, splitAndGlob b return "", false } value := r.lookupVar(part.Param.Value).String() - if splitAndGlob && unsafeUnquotedCommandValue(value) { + if splitAndGlob && r.unsafeUnquotedCommandValue(value) { return "", false } return value, true @@ -491,12 +499,23 @@ func simpleParamExp(param *syntax.ParamExp) bool { param.Repl == nil && param.Names == 0 && param.Exp == nil } -func unsafeUnquotedCommandValue(value string) bool { +func (r *Runner) unsafeUnquotedCommandValue(value string) bool { for _, r := range value { switch r { - case ' ', '\t', '\n', '*', '?', '[': + case '*', '?', '[': return true } } + ifs := " \t\n" + if vr := r.lookupVar("IFS"); vr.IsSet() { + ifs = vr.String() + } + for _, r := range value { + for _, sep := range ifs { + if r == sep { + return true + } + } + } return false } diff --git a/interp/file_access_hooks_test.go b/interp/file_access_hooks_test.go index c67c7c0d1..b23a56a0b 100644 --- a/interp/file_access_hooks_test.go +++ b/interp/file_access_hooks_test.go @@ -226,6 +226,75 @@ func TestFileAccessHooksAttributeInputRedirectToExpandedCommandName(t *testing.T assert.Equal(t, FileAccessResultSuccess, after.Result) } +func TestFileAccessHooksAttributeRedirectWordSubstitutionToExpandedCommandName(t *testing.T) { + dir := t.TempDir() + namefile := filepath.Join(dir, "namefile") + input := filepath.Join(dir, "input.txt") + require.NoError(t, os.WriteFile(namefile, []byte("input.txt"), 0o644)) + require.NoError(t, os.WriteFile(input, []byte("needle\n"), 0o644)) + + var events []capturedFileAccessEvent + hooks := FileAccessHooks{ + Before: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "before", event: event}) + }, + After: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "after", event: event}) + }, + } + + require.NoError(t, runWithFileAccessHooks(t, dir, "cmd=cat; $cmd < $( Date: Thu, 14 May 2026 15:31:51 -0400 Subject: [PATCH 4/5] Keep file hook command hints through assignments --- interp/file_access_hooks_test.go | 41 ++++++++++++++++++++++++++++++++ interp/runner_exec.go | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/interp/file_access_hooks_test.go b/interp/file_access_hooks_test.go index b23a56a0b..9c90d68c5 100644 --- a/interp/file_access_hooks_test.go +++ b/interp/file_access_hooks_test.go @@ -295,6 +295,47 @@ func TestFileAccessHooksDoNotAttributeUnquotedCommandNameSplitByIFS(t *testing.T assert.Equal(t, FileAccessResultSuccess, after.Result) } +func TestFileAccessHooksAttributeInlineAssignmentSubstitutionToCommand(t *testing.T) { + dir := t.TempDir() + tokenfile := filepath.Join(dir, "tokenfile") + input := filepath.Join(dir, "input.txt") + require.NoError(t, os.WriteFile(tokenfile, []byte("token"), 0o644)) + require.NoError(t, os.WriteFile(input, []byte("needle\n"), 0o644)) + + var events []capturedFileAccessEvent + hooks := FileAccessHooks{ + Before: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "before", event: event}) + }, + After: func(_ context.Context, event FileAccessEvent) { + events = append(events, capturedFileAccessEvent{phase: "after", event: event}) + }, + } + + require.NoError(t, runWithFileAccessHooks(t, dir, "TOKEN=$( Date: Thu, 14 May 2026 18:54:52 -0400 Subject: [PATCH 5/5] Add passive command dispatch hooks --- README.md | 2 + SHELL_FEATURES.md | 1 + builtins/builtins.go | 4 ++ builtins/find/eval.go | 11 ++-- builtins/find/find.go | 27 ++++---- builtins/xargs/xargs.go | 3 + interp/api.go | 5 ++ interp/command_hooks.go | 70 ++++++++++++++++++++ interp/command_hooks_test.go | 124 +++++++++++++++++++++++++++++++++++ interp/runner_exec.go | 42 ++++++++++-- 10 files changed, 267 insertions(+), 22 deletions(-) create mode 100644 interp/command_hooks.go create mode 100644 interp/command_hooks_test.go diff --git a/README.md b/README.md index edfda10dc..978222f0c 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,8 @@ Every access path is default-deny: Library callers can install passive file-access instrumentation with `WithFileAccessHooks`. When configured, rshell calls before/after hooks around sandboxed file opens, directory reads, stat/lstat/readlink/access checks, input redirections, command substitution file shortcuts, and glob expansion. The hooks receive command/source/operation/path context plus lightweight pre/post metadata, but cannot authorize, deny, rewrite, or otherwise alter file access. +Library callers can also install passive command-dispatch instrumentation with `WithCommandHooks`. When configured, rshell reports command name, arguments, allowed/known status, and exit code after each observed command dispatch. Command hooks are observability only and cannot authorize, deny, rewrite, or otherwise alter command execution. + > **Note:** The `ss`, `ip route`, and `df` builtins bypass `AllowedPaths` for their kernel-state reads. `ss` and `ip route` open `/proc/net/*` paths directly; `df` reads `/proc/self/mountinfo` (Linux) or calls `getfsstat(2)` (macOS), then issues `unix.Statfs(2)` against every kernel-reported mount point. These paths are hardcoded — never derived from user input — and `Statfs` returns metadata only (block / inode counts, filesystem type, block size). There is no sandbox-escape risk, but operators cannot use `AllowedPaths` to block `ss` from enumerating local sockets, `ip route` from reading the routing table, or `df` from reporting mount-table capacity — these reads succeed regardless of the configured path policy. **ProcPath** (Linux-only) overrides the proc filesystem root used by the `ps` builtin (default `/proc`). This is a privileged option set at runner construction time by trusted caller code — scripts cannot influence it. Access to the proc path is intentionally not subject to `AllowedPaths` restrictions, since proc is a read-only virtual filesystem that does not expose host data under the normal file hierarchy. diff --git a/SHELL_FEATURES.md b/SHELL_FEATURES.md index 4d7dd3d43..8b78eced2 100644 --- a/SHELL_FEATURES.md +++ b/SHELL_FEATURES.md @@ -109,6 +109,7 @@ The in-shell `help` command mirrors these feature categories: run `help` for a c - ✅ AllowedCommands — restricts which commands (builtins or external) may be executed; commands require the `rshell:` namespace prefix (e.g. `rshell:cat`); if not set, no commands are allowed - ✅ AllowedPaths filesystem sandboxing — restricts all file access to specified directories - ✅ File-access hooks — library callers can opt in to passive before/after metadata events for sandboxed file opens, directory reads, stat/lstat/readlink/access checks, input redirects, command substitution file shortcuts, and glob expansion +- ✅ Command hooks — library callers can opt in to passive command-dispatch events with command name, arguments, allowed/known status, and exit code - ✅ Whole-run execution timeout — callers can bound a `Run()` call via `context.Context`, `interp.MaxExecutionTime`, or the CLI `--timeout` flag; the deadline applies to the entire script, not each individual command - ✅ ProcPath — overrides the proc filesystem path used by `ps` (default `/proc`; Linux-only; useful for testing/container environments) - ❌ External commands — blocked by default; requires an ExecHandler to be configured and the binary to be within AllowedPaths diff --git a/builtins/builtins.go b/builtins/builtins.go index 8eaeefe76..dd0d30167 100644 --- a/builtins/builtins.go +++ b/builtins/builtins.go @@ -184,6 +184,10 @@ type CallContext struct { // commands. CommandAllowed func(name string) bool + // CommandDenied reports an attempted child-command dispatch that was + // rejected by CommandAllowed before RunCommand was invoked. + CommandDenied func(ctx context.Context, name string, args []string) + // WorkDir returns the shell's current working directory (absolute path). // Used by builtins that need to compute absolute paths for sub-operations. WorkDir func() string diff --git a/builtins/find/eval.go b/builtins/find/eval.go index e6bae733a..76985f409 100644 --- a/builtins/find/eval.go +++ b/builtins/find/eval.go @@ -304,15 +304,18 @@ func evalExecLike(ec *evalContext, e *expr, name, replacement, dir string) evalR return evalResult{} } cmd := strings.ReplaceAll(e.execCmd, "{}", replacement) + args := make([]string, len(e.execArgs)) + for i, a := range e.execArgs { + args[i] = strings.ReplaceAll(a, "{}", replacement) + } if ec.callCtx.CommandAllowed != nil && !ec.callCtx.CommandAllowed(cmd) { + if ec.callCtx.CommandDenied != nil { + ec.callCtx.CommandDenied(ec.ctx, cmd, args) + } ec.callCtx.Errf("find: %s: '%s': command not allowed\n", name, cmd) ec.failed = true return evalResult{} } - args := make([]string, len(e.execArgs)) - for i, a := range e.execArgs { - args[i] = strings.ReplaceAll(a, "{}", replacement) - } exitCode, err := ec.callCtx.RunCommand(ec.ctx, dir, cmd, args) if err != nil { ec.callCtx.Errf("find: '%s': %s\n", cmd, err) diff --git a/builtins/find/find.go b/builtins/find/find.go index c8676aeb9..2f167f7ae 100644 --- a/builtins/find/find.go +++ b/builtins/find/find.go @@ -184,11 +184,16 @@ optLoop: // Post-parse validation: check -exec/-execdir commands are allowed. // Commands containing {} are skipped here — the substituted name is // validated at eval-time when the replacement is known. - for _, cmd := range collectExecCmds(expression) { + for _, ex := range collectExecExprs(expression) { + cmd := ex.execCmd if strings.Contains(cmd, "{}") { continue } if callCtx.CommandAllowed != nil && !callCtx.CommandAllowed(cmd) { + args := append([]string(nil), ex.execArgs...) + if callCtx.CommandDenied != nil { + callCtx.CommandDenied(ctx, cmd, args) + } callCtx.Errf("find: '%s': command not allowed\n", cmd) return builtins.Result{Code: 1} } @@ -605,23 +610,23 @@ func walkPath( return walkResult{failed: failed, quit: quit} } -// collectExecCmds walks the expression tree and returns all -exec/-execdir command names. -func collectExecCmds(e *expr) []string { - var cmds []string - collectExecCmdsInto(e, &cmds) - return cmds +// collectExecExprs walks the expression tree and returns all -exec/-execdir expressions. +func collectExecExprs(e *expr) []*expr { + var execs []*expr + collectExecExprsInto(e, &execs) + return execs } -func collectExecCmdsInto(e *expr, cmds *[]string) { +func collectExecExprsInto(e *expr, execs *[]*expr) { if e == nil { return } if e.kind == exprExecDir || e.kind == exprExec { - *cmds = append(*cmds, e.execCmd) + *execs = append(*execs, e) } - collectExecCmdsInto(e.left, cmds) - collectExecCmdsInto(e.right, cmds) - collectExecCmdsInto(e.operand, cmds) + collectExecExprsInto(e.left, execs) + collectExecExprsInto(e.right, execs) + collectExecExprsInto(e.operand, execs) } // collectNewerRefs walks the expression tree and returns all -newer reference paths. diff --git a/builtins/xargs/xargs.go b/builtins/xargs/xargs.go index bacbfa1c4..df4cdb094 100644 --- a/builtins/xargs/xargs.go +++ b/builtins/xargs/xargs.go @@ -784,6 +784,9 @@ func invokeCommand(ctx context.Context, callCtx *builtins.CallContext, o options return exitSubCmdNotStart, true } if callCtx.CommandAllowed != nil && !callCtx.CommandAllowed(finalCmd) { + if callCtx.CommandDenied != nil { + callCtx.CommandDenied(ctx, finalCmd, finalArgs) + } callCtx.Errf("xargs: %s: command not allowed\n", finalCmd) return exitSubCmdNotStart, true } diff --git a/interp/api.go b/interp/api.go index 5a566906f..cf900ec8a 100644 --- a/interp/api.go +++ b/interp/api.go @@ -99,6 +99,11 @@ type runnerConfig struct { // only: they cannot authorize, deny, or alter file access. fileAccessHooks FileAccessHooks + // commandHooks, when configured, receives passive events for command + // dispatch decisions. Hooks are observability only: they cannot authorize, + // deny, or alter command execution. + commandHooks CommandHooks + // usedNew is set by New() and checked in Reset() to ensure a Runner // was properly constructed rather than zero-initialized. usedNew bool diff --git a/interp/command_hooks.go b/interp/command_hooks.go new file mode 100644 index 000000000..78ee9a9d7 --- /dev/null +++ b/interp/command_hooks.go @@ -0,0 +1,70 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp + +import ( + "context" + + "github.com/DataDog/rshell/builtins" +) + +// CommandHooks are passive callbacks invoked around rshell command dispatch +// when configured by [WithCommandHooks]. Hooks are observability only: they +// cannot authorize, deny, rewrite, or otherwise alter command execution. +type CommandHooks struct { + After func(context.Context, CommandEvent) +} + +// CommandEvent describes one command dispatch observed by rshell. +type CommandEvent struct { + Name string + Args []string + + IsAllowed bool + IsKnown bool + ExitCode uint8 +} + +// WithCommandHooks installs passive command-dispatch hooks. Nil callbacks are +// ignored. +func WithCommandHooks(hooks CommandHooks) RunnerOption { + return func(r *Runner) error { + r.commandHooks = hooks + return nil + } +} + +func (r *Runner) commandHooksEnabled() bool { + return r.commandHooks.After != nil +} + +func (r *Runner) callCommandHook(ctx context.Context, hook func(context.Context, CommandEvent), event CommandEvent) { + if hook == nil { + return + } + defer func() { + _ = recover() + }() + hook(ctx, event) +} + +func (r *Runner) notifyCommandDenied(ctx context.Context, name string, args []string) { + if !r.commandHooksEnabled() { + return + } + r.callCommandHook(ctx, r.commandHooks.After, CommandEvent{ + Name: name, + Args: append([]string(nil), args...), + IsAllowed: false, + IsKnown: commandIsKnown(name), + ExitCode: 127, + }) +} + +func commandIsKnown(name string) bool { + _, ok := builtins.Lookup(name) + return ok +} diff --git a/interp/command_hooks_test.go b/interp/command_hooks_test.go new file mode 100644 index 000000000..ad2f715e7 --- /dev/null +++ b/interp/command_hooks_test.go @@ -0,0 +1,124 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package interp + +import ( + "context" + "io" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func runWithCommandHooks(t *testing.T, script string, hooks CommandHooks, allowedCommands []string) error { + t.Helper() + prog, err := ParseScript(script, "") + require.NoError(t, err) + r, err := New( + StdIO(nil, io.Discard, io.Discard), + AllowedCommands(allowedCommands), + WithCommandHooks(hooks), + ) + require.NoError(t, err) + t.Cleanup(func() { r.Close() }) + return r.Run(context.Background(), prog) +} + +func TestCommandHooksReportAllowedAndDeniedCommands(t *testing.T) { + var events []CommandEvent + hooks := CommandHooks{ + After: func(_ context.Context, event CommandEvent) { + events = append(events, event) + }, + } + + err := runWithCommandHooks(t, `echo ok; awk '{print}' missing.txt`, hooks, []string{"rshell:echo"}) + var exit ExitStatus + require.ErrorAs(t, err, &exit) + assert.Equal(t, ExitStatus(127), exit) + require.Len(t, events, 2) + + assert.Equal(t, "echo", events[0].Name) + assert.Equal(t, []string{"ok"}, events[0].Args) + assert.True(t, events[0].IsAllowed) + assert.True(t, events[0].IsKnown) + assert.Equal(t, uint8(0), events[0].ExitCode) + + assert.Equal(t, "awk", events[1].Name) + assert.Equal(t, []string{"{print}", "missing.txt"}, events[1].Args) + assert.False(t, events[1].IsAllowed) + assert.False(t, events[1].IsKnown) + assert.Equal(t, uint8(127), events[1].ExitCode) +} + +func TestCommandHooksReportChildCommandDeniedByBuiltin(t *testing.T) { + var events []CommandEvent + hooks := CommandHooks{ + After: func(_ context.Context, event CommandEvent) { + events = append(events, event) + }, + } + + err := runWithCommandHooks(t, `printf 'a\n' | xargs awk`, hooks, []string{"rshell:printf", "rshell:xargs"}) + var exit ExitStatus + require.ErrorAs(t, err, &exit) + assert.Equal(t, ExitStatus(125), exit) + + var denied *CommandEvent + for i := range events { + if events[i].Name == "awk" && !events[i].IsAllowed { + denied = &events[i] + break + } + } + require.NotNil(t, denied, "expected denied child command event") + assert.False(t, denied.IsKnown) + assert.Equal(t, uint8(127), denied.ExitCode) +} + +func TestCommandHooksReportFindExecDeniedWithSubstitutedArgs(t *testing.T) { + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "input.txt"), []byte("hello\n"), 0o644)) + + var events []CommandEvent + hooks := CommandHooks{ + After: func(_ context.Context, event CommandEvent) { + events = append(events, event) + }, + } + prog, err := ParseScript(`find . -name input.txt -exec {} suffix-{} \;`, "") + require.NoError(t, err) + r, err := New( + StdIO(nil, io.Discard, io.Discard), + AllowedPaths([]string{dir}), + AllowedCommands([]string{"rshell:find"}), + WithCommandHooks(hooks), + ) + require.NoError(t, err) + t.Cleanup(func() { r.Close() }) + r.Dir = dir + + err = r.Run(context.Background(), prog) + var exit ExitStatus + require.ErrorAs(t, err, &exit) + assert.Equal(t, ExitStatus(1), exit) + + var denied *CommandEvent + for i := range events { + if !events[i].IsAllowed { + denied = &events[i] + break + } + } + require.NotNil(t, denied, "expected denied find -exec command event") + assert.Contains(t, denied.Name, "input.txt") + require.Len(t, denied.Args, 1) + assert.NotContains(t, denied.Args[0], "{}") + assert.Contains(t, denied.Args[0], "input.txt") +} diff --git a/interp/runner_exec.go b/interp/runner_exec.go index 6199de55b..8c42c6923 100644 --- a/interp/runner_exec.go +++ b/interp/runner_exec.go @@ -510,6 +510,17 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { // short-circuits dispatch. isAllowed := r.allowAllCommands || r.allowedCommands[name] fn, isKnown := builtins.Lookup(name) + if r.commandHooksEnabled() { + defer func() { + r.callCommandHook(ctx, r.commandHooks.After, CommandEvent{ + Name: name, + Args: append([]string(nil), args[1:]...), + IsAllowed: isAllowed, + IsKnown: isKnown, + ExitCode: r.exit.code, + }) + }() + } span, ctx := telemetry.StartSpanFromContext(ctx, "command") span.SetResourceName(name) @@ -554,13 +565,27 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { if isKnown { r.dispatchedCount++ var runCmdWithStdin func(context.Context, string, string, []string, io.Reader) (uint8, error) - runCmdWithStdin = func(ctx context.Context, dir string, cmdName string, cmdArgs []string, childStdin io.Reader) (uint8, error) { - if !r.allowAllCommands && !r.allowedCommands[cmdName] { - return 127, fmt.Errorf("rshell: %s: command not allowed", cmdName) + runCmdWithStdin = func(ctx context.Context, dir string, cmdName string, cmdArgs []string, childStdin io.Reader) (code uint8, err error) { + isAllowed := r.allowAllCommands || r.allowedCommands[cmdName] + cmdFn, isKnown := builtins.Lookup(cmdName) + if r.commandHooksEnabled() { + defer func() { + r.callCommandHook(ctx, r.commandHooks.After, CommandEvent{ + Name: cmdName, + Args: append([]string(nil), cmdArgs...), + IsAllowed: isAllowed, + IsKnown: isKnown, + ExitCode: code, + }) + }() + } + if !isAllowed { + code = 127 + return code, fmt.Errorf("rshell: %s: command not allowed", cmdName) } - cmdFn, ok := builtins.Lookup(cmdName) - if !ok { - return 127, fmt.Errorf("rshell: %s: unknown command", cmdName) + if !isKnown { + code = 127 + return code, fmt.Errorf("rshell: %s: unknown command", cmdName) } child := &builtins.CallContext{ Stdout: r.stdout, @@ -648,6 +673,7 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { CommandAllowed: func(n string) bool { return r.allowAllCommands || r.allowedCommands[n] }, + CommandDenied: r.notifyCommandDenied, RunCommand: func(ctx context.Context, dir string, name string, args []string) (uint8, error) { // Inherit the parent's overridden stdin so grandchildren // dispatched via RunCommand (the no-stdin variant) stay @@ -677,7 +703,8 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { child.Stdin = r.stdin } result := cmdFn(ctx, child, cmdArgs) - return result.Code, nil + code = result.Code + return code, nil } runCmd := func(ctx context.Context, dir string, cmdName string, cmdArgs []string) (uint8, error) { return runCmdWithStdin(ctx, dir, cmdName, cmdArgs, nil) @@ -778,6 +805,7 @@ func (r *Runner) call(ctx context.Context, pos syntax.Pos, args []string) { CommandAllowed: func(cmdName string) bool { return r.allowAllCommands || r.allowedCommands[cmdName] }, + CommandDenied: r.notifyCommandDenied, RunCommand: runCmd, RunCommandWithStdin: runCmdWithStdin, SetVar: func(name, value string) error {