Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .claude/skills/implement-awk/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ argument-hint: "[feature-or-failure-filter]"
Use this skill when implementing, extending, or fixing the rshell `awk`
builtin.

## Shared Implementation Plan

Before starting or resuming implementation work, read
`docs/AWK_IMPLEMENTATION_PLAN.md`. That document captures the agreed rshell awk
profile, the long-lived parser strategy, Phase 1 Practical awk scope, safety
policy, test plan, and later-phase roadmap.

## Compatibility Target

The implementation target is GNU awk (`gawk`), not POSIX awk alone, One True
Expand Down
1 change: 1 addition & 0 deletions SHELL_FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The in-shell `help` command mirrors these feature categories: run `help` for a c

## Builtins

- ✅ `awk [-F SEP] [-v NAME=VALUE] ['PROGRAM'|-f PROGRAM-FILE] [FILE]...` — pattern scanning and text processing; Phase 1 supports BEGIN/main/END rules, read-only fields (`$0`, `$1`, `$NF`), `NF`/`NR`/`FNR`/`FILENAME`, `FS`/`OFS`/`ORS`, `print`, scalar assignment, arithmetic/comparison/boolean expressions, regex patterns and `~`/`!~`, and string concatenation; `system()`, command pipes, output redirection, `getline`, arrays, control flow, `printf`, regex `FS`, and field mutation are rejected or deferred
- ✅ `break` — exit the innermost `for` loop
- ✅ `cat [-AbeEnstTuv] [FILE]...` — concatenate files to stdout; supports line numbering, blank squeezing, and non-printing character display
- ✅ `continue` — skip to the next iteration of the innermost `for` loop
Expand Down
26 changes: 26 additions & 0 deletions analysis/symbols_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@ package analysis
// Every symbol listed here must also appear in builtinAllowedSymbols
// (which acts as the global ceiling).
var builtinPerCommandSymbols = map[string][]string{
"awk": {
"bufio.NewScanner", // 🟢 line-by-line record reading; no write or exec capability.
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
"fmt.Errorf", // 🟢 error formatting; pure function, no I/O.
"io.EOF", // 🟢 sentinel error value; pure constant.
"io.NopCloser", // 🟢 wraps a Reader with a no-op Close; no side effects.
"io.ReadCloser", // 🟢 interface type; no side effects.
"math.Mod", // 🟢 pure arithmetic modulo for awk % operator; no side effects.
"os.O_RDONLY", // 🟢 read-only file flag constant; cannot open files by itself.
"regexp.Compile", // 🟢 compiles a regular expression; pure function, no I/O. Uses RE2 engine (linear-time, no backtracking).
"regexp.Regexp", // 🟢 compiled regular expression type; no I/O side effects. All matching methods are linear-time (RE2).
"strconv.FormatFloat", // 🟢 float-to-string conversion for awk numeric output; pure function.
"strconv.ParseFloat", // 🟢 string-to-float conversion; pure function, no I/O.
"strings.Builder", // 🟢 efficient string concatenation; pure in-memory buffer, no I/O.
"strings.Cut", // 🟢 splits a string around the first separator; pure function, no I/O.
"strings.Join", // 🟢 concatenates a slice of strings with a separator; pure function, no I/O.
"strings.NewReader", // 🟢 wraps a string as an io.Reader; pure in-memory, no I/O.
"strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O.
"strings.TrimSpace", // 🟢 removes leading/trailing whitespace; pure function.
"unicode/utf8.DecodeRuneInString", // 🟢 decodes first UTF-8 rune from a string; pure function, no I/O.
"unicode/utf8.RuneError", // 🟢 replacement character returned for invalid UTF-8; constant, no I/O.
},
"break": {
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
},
Expand Down Expand Up @@ -561,6 +583,7 @@ var builtinAllowedSymbols = []string{
"math.MaxInt64", // 🟢 integer constant; no side effects.
"math.MaxUint64", // 🟢 integer constant; no side effects.
"math.MinInt64", // 🟢 integer constant; no side effects.
"math.Mod", // 🟢 pure arithmetic modulo; no side effects.
"math.NaN", // 🟢 returns IEEE 754 NaN value; pure function, no I/O.
"net.DefaultResolver", // 🔴 default system DNS resolver; used for context-aware address lookup; network I/O is the explicit purpose of the ping builtin.
"net.FlagBroadcast", // 🟢 interface flag constant: broadcast capability; pure constant, no network connections.
Expand Down Expand Up @@ -600,6 +623,7 @@ var builtinAllowedSymbols = []string{
"strconv.Atoi", // 🟢 string-to-int conversion; pure function, no I/O.
"strconv.ErrRange", // 🟢 sentinel error value for overflow; pure constant.
"strconv.FormatBool", // 🟢 bool-to-string conversion; pure function, no I/O.
"strconv.FormatFloat", // 🟢 float-to-string conversion; pure function, no I/O.
"strconv.FormatInt", // 🟢 int-to-string conversion; pure function, no I/O.
"strconv.FormatUint", // 🟢 uint-to-string conversion; pure function, no I/O.
"strconv.IntSize", // 🟢 platform int size constant (32 or 64); pure constant, no I/O.
Expand All @@ -612,10 +636,12 @@ var builtinAllowedSymbols = []string{
"strings.Builder", // 🟢 efficient string concatenation; pure in-memory buffer, no I/O.
"strings.Contains", // 🟢 substring search; pure function, no I/O.
"strings.ContainsRune", // 🟢 checks if a rune is in a string; pure function, no I/O.
"strings.Cut", // 🟢 splits a string around the first separator; pure function, no I/O.
"strings.Fields", // 🟢 splits a string on whitespace into a slice; pure function, no I/O.
"strings.HasPrefix", // 🟢 pure function for prefix matching; no I/O.
"strings.IndexByte", // 🟢 finds byte in string; pure function, no I/O.
"strings.Join", // 🟢 concatenates a slice of strings with a separator; pure function, no I/O.
"strings.NewReader", // 🟢 wraps a string as an io.Reader; pure in-memory, no I/O.
"strings.ReplaceAll", // 🟢 replaces all occurrences of a substring; pure function, no I/O.
"strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O.
"strings.ToLower", // 🟢 converts string to lowercase; pure function, no I/O.
Expand Down
19 changes: 15 additions & 4 deletions analysis/symbols_verification_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,11 @@ func writeGoFile(t *testing.T, path, pkg string, imports []string, body string)
func findFirstSubdirGoFile(t *testing.T, dir string) string {
t.Helper()
var found string
var fallback string
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if found != "" {
return filepath.SkipAll
}
if info.IsDir() {
return nil
}
Expand All @@ -162,13 +160,26 @@ func findFirstSubdirGoFile(t *testing.T, dir string) string {
return nil
}
if strings.Contains(rel, string(filepath.Separator)) {
found = path
data, err := os.ReadFile(path)
if err != nil {
return err
}
if strings.Contains(string(data), "import (") {
found = path
return filepath.SkipAll
}
if fallback == "" {
fallback = path
}
}
return nil
})
if err != nil {
t.Fatal(err)
}
if found == "" {
found = fallback
}
if found == "" {
t.Fatalf("no .go file found in subdirectories of %s", dir)
}
Expand Down
112 changes: 112 additions & 0 deletions builtins/awk/ast.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2026-present Datadog, Inc.

package awk

type program struct {
rules []rule
}

type ruleKind int

const (
ruleNormal ruleKind = iota
ruleBegin
ruleEnd
)

type rule struct {
kind ruleKind
pattern expr
action []stmt
}

type stmt interface {
stmtNode()
}

type printStmt struct {
args []expr
}

func (*printStmt) stmtNode() {}

type exprStmt struct {
x expr
}

func (*exprStmt) stmtNode() {}

type expr interface {
exprNode()
}

type numberExpr struct {
text string
num float64
}

func (*numberExpr) exprNode() {}

type stringExpr struct {
value string
}

func (*stringExpr) exprNode() {}

type regexExpr struct {
pattern string
}

func (*regexExpr) exprNode() {}

type varExpr struct {
name string
}

func (*varExpr) exprNode() {}

type fieldExpr struct {
index expr
}

func (*fieldExpr) exprNode() {}

type groupedExpr struct {
x expr
}

func (*groupedExpr) exprNode() {}

type unaryExpr struct {
op string
x expr
}

func (*unaryExpr) exprNode() {}

type binaryExpr struct {
op string
left expr
right expr
}

func (*binaryExpr) exprNode() {}

type assignExpr struct {
op string
left expr
right expr
}

func (*assignExpr) exprNode() {}

type incDecExpr struct {
op string
x expr
prefix bool
}

func (*incDecExpr) exprNode() {}
Loading