Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion SHELL_FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ The in-shell `help` command mirrors these feature categories: run `help` for a c

## Builtins

- ✅ `awk [-F SEP] [-v NAME=VALUE] ['PROGRAM'|-f PROGRAM-FILE] [FILE]...` — pattern scanning and text processing; Phase 1 supports BEGIN/main/END rules, read-only fields (`$0`, `$1`, `$NF`), `NF`/`NR`/`FNR`/`FILENAME`, `FS`/`OFS`/`ORS`, `print`, scalar assignment, arithmetic/comparison/boolean expressions, regex patterns and `~`/`!~`, and string concatenation; `system()`, command pipes, output redirection, `getline`, arrays, control flow, `printf`, regex `FS`, and field mutation are rejected or deferred
- ✅ `awk [-F SEP] [-v NAME=VALUE] ['PROGRAM'|-f PROGRAM-FILE] [FILE]...` — pattern scanning and text processing; supports BEGIN/main/END rules, read-only fields (`$0`, `$1`, `$NF`), `NF`/`NR`/`FNR`/`FILENAME`, `FS`/`OFS`/`ORS`, `print`, `printf`, scalar assignment, arithmetic/comparison/boolean expressions, regex patterns and `~`/`!~`, string concatenation, `if`/`else`, `next`, and scalar builtins (`length`, `substr`, `index`, `tolower`, `toupper`, `int`); `system()`, command pipes, output redirection, `getline`, arrays, loops, regex `FS`, and field mutation are rejected or deferred
- ✅ `break` — exit the innermost `for` loop
- ✅ `cat [-AbeEnstTuv] [FILE]...` — concatenate files to stdout; supports line numbering, blank squeezing, and non-printing character display
- ✅ `continue` — skip to the next iteration of the innermost `for` loop
Expand Down
19 changes: 19 additions & 0 deletions analysis/symbols_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,34 @@ var builtinPerCommandSymbols = map[string][]string{
"awk": {
"bufio.NewScanner", // 🟢 line-by-line record reading; no write or exec capability.
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
"errors.Is", // 🟢 error comparison; pure function, no I/O.
"errors.New", // 🟢 creates a simple error value; pure function, no I/O.
"fmt.Errorf", // 🟢 error formatting; pure function, no I/O.
"fmt.Sprintf", // 🟢 string formatting for awk printf; pure function, no I/O.
"io.EOF", // 🟢 sentinel error value; pure constant.
"io.NopCloser", // 🟢 wraps a Reader with a no-op Close; no side effects.
"io.ReadCloser", // 🟢 interface type; no side effects.
"math/big.Float", // 🟢 arbitrary-precision float type used to convert large awk printf integers; pure in-memory arithmetic.
"math/big.Int", // 🟢 arbitrary-precision integer type used for large awk printf integers; pure in-memory arithmetic.
"math/big.NewInt", // 🟢 constructs an in-memory integer value; pure function, no I/O.
"math.IsInf", // 🟢 IEEE 754 infinity check; pure function, no I/O.
"math.IsNaN", // 🟢 IEEE 754 NaN check; pure function, no I/O.
"math.Mod", // 🟢 pure arithmetic modulo for awk % operator; no side effects.
"math.Trunc", // 🟢 pure arithmetic truncation for awk int(); no side effects.
"os.O_RDONLY", // 🟢 read-only file flag constant; cannot open files by itself.
"regexp.Compile", // 🟢 compiles a regular expression; pure function, no I/O. Uses RE2 engine (linear-time, no backtracking).
"regexp.Regexp", // 🟢 compiled regular expression type; no I/O side effects. All matching methods are linear-time (RE2).
"strconv.FormatFloat", // 🟢 float-to-string conversion for awk numeric output; pure function.
"strconv.ParseFloat", // 🟢 string-to-float conversion; pure function, no I/O.
"strings.Builder", // 🟢 efficient string concatenation; pure in-memory buffer, no I/O.
"strings.ContainsRune", // 🟢 checks if a rune is in a string; pure function, no I/O.
"strings.Cut", // 🟢 splits a string around the first separator; pure function, no I/O.
"strings.Index", // 🟢 substring search for awk index(); pure function, no I/O.
"strings.Join", // 🟢 concatenates a slice of strings with a separator; pure function, no I/O.
"strings.NewReader", // 🟢 wraps a string as an io.Reader; pure in-memory, no I/O.
"strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O.
"strings.ToLower", // 🟢 converts string to lowercase for awk tolower(); pure function, no I/O.
"strings.ToUpper", // 🟢 converts string to uppercase for awk toupper(); pure function, no I/O.
"strings.TrimSpace", // 🟢 removes leading/trailing whitespace; pure function.
"unicode/utf8.DecodeRuneInString", // 🟢 decodes first UTF-8 rune from a string; pure function, no I/O.
"unicode/utf8.RuneError", // 🟢 replacement character returned for invalid UTF-8; constant, no I/O.
Expand Down Expand Up @@ -574,6 +587,9 @@ var builtinAllowedSymbols = []string{
"io/fs.ModeSticky", // 🟢 file mode bit constant for sticky bit; pure constant.
"io/fs.ModeSymlink", // 🟢 file mode bit constant for symlinks; pure constant.
"io/fs.ReadDirFile", // 🟢 read-only directory handle interface; no write capability.
"math/big.Float", // 🟢 arbitrary-precision float type; pure in-memory arithmetic.
"math/big.Int", // 🟢 arbitrary-precision integer type; pure in-memory arithmetic.
"math/big.NewInt", // 🟢 constructs an in-memory integer value; pure function, no I/O.
"math.Ceil", // 🟢 pure arithmetic; no side effects.
"math.Floor", // 🟢 pure arithmetic; no side effects.
"math.Inf", // 🟢 returns positive or negative infinity; pure function, no I/O.
Expand All @@ -585,6 +601,7 @@ var builtinAllowedSymbols = []string{
"math.MinInt64", // 🟢 integer constant; no side effects.
"math.Mod", // 🟢 pure arithmetic modulo; no side effects.
"math.NaN", // 🟢 returns IEEE 754 NaN value; pure function, no I/O.
"math.Trunc", // 🟢 pure arithmetic truncation toward zero; no side effects.
"net.DefaultResolver", // 🔴 default system DNS resolver; used for context-aware address lookup; network I/O is the explicit purpose of the ping builtin.
"net.FlagBroadcast", // 🟢 interface flag constant: broadcast capability; pure constant, no network connections.
"net.IPAddr", // 🟢 resolved IP address struct (IP + Zone); pure data type, no I/O.
Expand Down Expand Up @@ -639,12 +656,14 @@ var builtinAllowedSymbols = []string{
"strings.Cut", // 🟢 splits a string around the first separator; pure function, no I/O.
"strings.Fields", // 🟢 splits a string on whitespace into a slice; pure function, no I/O.
"strings.HasPrefix", // 🟢 pure function for prefix matching; no I/O.
"strings.Index", // 🟢 substring search; pure function, no I/O.
"strings.IndexByte", // 🟢 finds byte in string; pure function, no I/O.
"strings.Join", // 🟢 concatenates a slice of strings with a separator; pure function, no I/O.
"strings.NewReader", // 🟢 wraps a string as an io.Reader; pure in-memory, no I/O.
"strings.ReplaceAll", // 🟢 replaces all occurrences of a substring; pure function, no I/O.
"strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O.
"strings.ToLower", // 🟢 converts string to lowercase; pure function, no I/O.
"strings.ToUpper", // 🟢 converts string to uppercase; pure function, no I/O.
"strings.TrimPrefix", // 🟢 removes a leading prefix from a string; pure function, no I/O.
"strings.TrimSpace", // 🟢 removes leading/trailing whitespace; pure function.
"syscall.ByHandleFileInformation", // 🟢 Windows file info struct for extracting nlink; read-only type, no I/O.
Expand Down
25 changes: 25 additions & 0 deletions builtins/awk/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,24 @@ type printStmt struct {

func (*printStmt) stmtNode() {}

type printfStmt struct {
args []expr
}

func (*printfStmt) stmtNode() {}

type ifStmt struct {
cond expr
thenStmts []stmt
elseStmts []stmt
}

func (*ifStmt) stmtNode() {}

type nextStmt struct{}

func (*nextStmt) stmtNode() {}

type exprStmt struct {
x expr
}
Expand Down Expand Up @@ -110,3 +128,10 @@ type incDecExpr struct {
}

func (*incDecExpr) exprNode() {}

type callExpr struct {
name string
args []expr
}

func (*callExpr) exprNode() {}
17 changes: 9 additions & 8 deletions builtins/awk/awk.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,17 @@
//
// awk [OPTION]... -f program-file [FILE]...
//
// Phase 1 implements a practical, intentionally restricted awk profile:
// program loading from an inline argument or -f files, -F one-character field
// separators, -v scalar variables, BEGIN/main/END rules, print, scalar
// assignment, arithmetic/comparison/boolean expressions, regex patterns and
// match operators, string concatenation, and read-only fields/built-in
// variables such as $0, $1, NF, NR, FNR, FILENAME, FS, OFS, and ORS.
// This implements a practical, intentionally restricted awk profile: program
// loading from an inline argument or -f files, -F one-character field
// separators, -v scalar variables, BEGIN/main/END rules, print and printf,
// scalar assignment, if/else, next, arithmetic/comparison/boolean expressions,
// regex patterns and match operators, string concatenation, scalar built-in
// functions, and read-only fields/built-in variables such as $0, $1, NF, NR,
// FNR, FILENAME, FS, OFS, and ORS.
//
// Blocked or deferred features include system(), command pipes, output
// redirection, getline, arrays, control flow statements, printf, user-defined
// functions, regex FS, and field mutation/$0 rebuilding.
// redirection, getline, arrays, loops, user-defined functions, regex FS, and
// field mutation/$0 rebuilding.
package awk

import (
Expand Down
113 changes: 113 additions & 0 deletions builtins/awk/eval.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
package awk

import (
"errors"
"fmt"
"math"
"strings"
)

var errNextRecord = errors.New("next record")

func (rt *runtime) execStatements(stmts []stmt) error {
for _, st := range stmts {
switch s := st.(type) {
Expand All @@ -30,6 +33,39 @@ func (rt *runtime) execStatements(stmts []stmt) error {
if err := rt.printValues(vals); err != nil {
return err
}
case *printfStmt:
if len(s.args) == 0 {
return fmt.Errorf("printf requires a format expression")
}
vals := make([]value, 0, len(s.args))
for _, arg := range s.args {
v, err := rt.eval(arg)
if err != nil {
return err
}
vals = append(vals, v)
}
out, err := formatPrintf(vals[0].String(), vals[1:])
if err != nil {
return err
}
rt.callCtx.Out(out)
case *ifStmt:
cond, err := rt.eval(s.cond)
if err != nil {
return err
}
if cond.Bool() {
if err := rt.execStatements(s.thenStmts); err != nil {
return err
}
} else if len(s.elseStmts) > 0 {
if err := rt.execStatements(s.elseStmts); err != nil {
return err
}
}
case *nextStmt:
return errNextRecord
case *exprStmt:
if _, err := rt.eval(s.x); err != nil {
return err
Expand All @@ -41,6 +77,26 @@ func (rt *runtime) execStatements(stmts []stmt) error {
return nil
}

func substrStart(n float64, length int) int {
if n <= 1 || math.IsNaN(n) {
return 0
}
if n > float64(length) {
return length
}
return int(n) - 1
}

func substrEnd(start, length int, count float64) int {
if count <= 0 || math.IsNaN(count) {
return start
}
if count >= float64(length-start) {
return length
}
return start + int(count)
}

func (rt *runtime) printValues(vals []value) error {
parts := make([]string, len(vals))
for i, v := range vals {
Expand Down Expand Up @@ -101,11 +157,68 @@ func (rt *runtime) eval(x expr) (value, error) {
return rt.evalAssign(e)
case *incDecExpr:
return rt.evalIncDec(e)
case *callExpr:
return rt.evalCall(e)
default:
return value{}, fmt.Errorf("unknown expression")
}
}

func (rt *runtime) evalCall(e *callExpr) (value, error) {
args := make([]value, 0, len(e.args))
for _, arg := range e.args {
v, err := rt.eval(arg)
if err != nil {
return value{}, err
}
args = append(args, v)
}
if err := validateBuiltinCallArity(e.name, len(args)); err != nil {
return value{}, err
}
switch e.name {
case "length":
s := rt.field(0).String()
if len(args) == 1 {
s = args[0].String()
}
return numberValue(float64(len([]rune(s)))), nil
case "substr":
s := []rune(args[0].String())
start := substrStart(args[1].Number(), len(s))
if start >= len(s) {
return stringValue(""), nil
}
end := len(s)
if len(args) == 3 {
end = substrEnd(start, len(s), args[2].Number())
}
return stringValue(string(s[start:end])), nil
case "index":
haystack := args[0].String()
needle := args[1].String()
if needle == "" {
return numberValue(1), nil
}
Comment thread
matt-dz marked this conversation as resolved.
pos := strings.Index(haystack, needle)
if pos < 0 {
return numberValue(0), nil
}
return numberValue(float64(len([]rune(haystack[:pos])) + 1)), nil
case "tolower":
s := args[0].String()
return stringValue(strings.ToLower(s)), nil
case "toupper":
s := args[0].String()
return stringValue(strings.ToUpper(s)), nil
case "int":
v := args[0]
return numberValue(math.Trunc(v.Number())), nil
default:
return value{}, fmt.Errorf("function calls are not supported")
}
}

func (rt *runtime) evalBinary(e *binaryExpr) (value, error) {
if e.op == "&&" {
left, err := rt.eval(e.left)
Expand Down
2 changes: 1 addition & 1 deletion builtins/awk/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ func (l *lexer) scanRegex(start int) (token, error) {
}

func canStartRegex(prev tokenKind, prevLit string) bool {
if prev == tokIdent && prevLit == "print" {
if prev == tokIdent && (prevLit == "print" || prevLit == "printf") {
return true
}
switch prev {
Expand Down
Loading