diff --git a/.claude/skills/implement-awk/SKILL.md b/.claude/skills/implement-awk/SKILL.md index 8092a4a02..815eca3e7 100644 --- a/.claude/skills/implement-awk/SKILL.md +++ b/.claude/skills/implement-awk/SKILL.md @@ -74,6 +74,7 @@ Run this sequence after every coherent implementation step: ```bash make fmt go test ./... +RSHELL_BIN=./rshell AWK_UNDER_TEST=tools/awk-harness/rshell-awk tools/awk-harness/run.sh rewritten RSHELL_BIN=./rshell AWK_UNDER_TEST=tools/awk-harness/rshell-awk tools/awk-harness/run.sh gawk RSHELL_BIN=./rshell AWK_UNDER_TEST=tools/awk-harness/rshell-awk tools/awk-harness/run.sh onetrueawk ``` @@ -104,6 +105,8 @@ make build - safety rejection behavior - runtime or resource limit 7. Add or update original rshell tests for the intended behavior. + Prefer `tests/awk_scenarios` for GNU awk behavior that came from upstream + AWK coverage, and include upstream metadata for traceability. 8. Implement the smallest code change that addresses the cluster. 9. Run `make fmt`. 10. Run focused tests. diff --git a/.github/workflows/awk-harness.yml b/.github/workflows/awk-harness.yml index eda48c1e1..c903c7adb 100644 --- a/.github/workflows/awk-harness.yml +++ b/.github/workflows/awk-harness.yml @@ -18,6 +18,8 @@ jobs: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Install pinned GNU awk oracle run: tools/awk-harness/run.sh install-gawk + - name: Run rewritten AWK scenarios against pinned GNU awk + run: tools/awk-harness/run.sh rewritten - name: Fetch One True Awk tests env: AWK_HARNESS_BOOTSTRAP: "1" diff --git a/Makefile b/Makefile index f3a158121..80377dac2 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: build fmt test test_all test_against_bash compliance +.PHONY: build fmt test test_all test_against_bash test_awk_rewritten compliance build: go build -o rshell ./cmd/rshell @@ -15,5 +15,8 @@ test_all: test_against_bash: RSHELL_BASH_TEST=1 go test -v ./tests/ -run TestShellScenariosAgainstBash -count=1 +test_awk_rewritten: + tools/awk-harness/run.sh rewritten + compliance: RSHELL_COMPLIANCE_TEST=1 go test -v ./tests/ -run TestCompliance -count=1 diff --git a/tests/awk_scenarios/README.md b/tests/awk_scenarios/README.md new file mode 100644 index 000000000..ff340a033 --- /dev/null +++ b/tests/awk_scenarios/README.md @@ -0,0 +1,39 @@ +# AWK Scenario Rewrites + +This directory contains rshell-owned AWK tests rewritten from upstream behavior +coverage. Do not copy upstream test bodies, helper scripts, comments, fixtures, +or expected output into this directory. + +Each scenario is a small GNU awk behavior case with metadata that identifies +which upstream suite or coverage area it belongs to and what behavior it covers. +The tests run through the AWK-specific Go runner in +`tests/awk_scenarios_test.go`. + +`enabled.txt` is the only run list. Each non-comment line is a path relative to +this directory: + +```text +gawk/basic/begin_end_records.yaml +onetrueawk/basic/pattern_action.yaml +``` + +`upstream-map.yaml` is an audit ledger for rewrite progress. It does not decide +which tests run. + +Run the rewritten scenarios against the pinned GNU awk oracle: + +```bash +tools/awk-harness/run.sh install-gawk +tools/awk-harness/run.sh rewritten +``` + +Run the same scenarios against rshell's `awk` adapter once the builtin exists: + +```bash +make build +RSHELL_BIN=./rshell AWK_UNDER_TEST=tools/awk-harness/rshell-awk tools/awk-harness/run.sh rewritten +``` + +The runner still compares rshell output to the pinned GNU awk oracle when +`GAWK_ORACLE` is set, so expected output in these files and live GNU awk +behavior must stay aligned. diff --git a/tests/awk_scenarios/enabled.txt b/tests/awk_scenarios/enabled.txt new file mode 100644 index 000000000..59145e3cc --- /dev/null +++ b/tests/awk_scenarios/enabled.txt @@ -0,0 +1,10 @@ +# Each non-comment line is a path relative to tests/awk_scenarios. +gawk/basic/begin_end_records.yaml +gawk/basic/field_separator.yaml +gawk/cli/variable_assignment.yaml +gawk/expressions/arithmetic_comparison.yaml +gawk/fields/assign_rebuilds_record.yaml +gawk/functions/split.yaml +gawk/output/print_separators.yaml +gawk/regex/pattern_match.yaml +onetrueawk/basic/pattern_action.yaml diff --git a/tests/awk_scenarios/gawk/basic/begin_end_records.yaml b/tests/awk_scenarios/gawk/basic/begin_end_records.yaml new file mode 100644 index 000000000..7df15f602 --- /dev/null +++ b/tests/awk_scenarios/gawk/basic/begin_end_records.yaml @@ -0,0 +1,25 @@ +description: BEGIN and END blocks wrap per-record actions +upstream: + suite: gawk + id: test/beginfile1.awk + ref: gawk-5.4.0 +covers: + - BEGIN actions run before input records are processed + - END actions run after all input records are processed + - NR is incremented for each input record + - $1 and $NF read the first and last fields of the current record +input: + program: | + BEGIN { print "start" } + { print NR ":" $1 "-" $NF } + END { print "count=" NR } + stdin: | + alpha beta + gamma delta epsilon +expect: + stdout: | + start + 1:alpha-beta + 2:gamma-epsilon + count=2 + exit_code: 0 diff --git a/tests/awk_scenarios/gawk/basic/field_separator.yaml b/tests/awk_scenarios/gawk/basic/field_separator.yaml new file mode 100644 index 000000000..a499b53d7 --- /dev/null +++ b/tests/awk_scenarios/gawk/basic/field_separator.yaml @@ -0,0 +1,20 @@ +description: FS controls field splitting and NF for comma-separated records +upstream: + suite: gawk + id: test/fieldwdth.awk + ref: gawk-5.4.0 +covers: + - FS controls field splitting for subsequent records + - NF reflects the number of fields in the current record +input: + program: | + BEGIN { FS = "," } + { print $2 ":" NF } + stdin: | + a,b,c + x,y +expect: + stdout: | + b:3 + y:2 + exit_code: 0 diff --git a/tests/awk_scenarios/gawk/cli/variable_assignment.yaml b/tests/awk_scenarios/gawk/cli/variable_assignment.yaml new file mode 100644 index 000000000..80c7aae76 --- /dev/null +++ b/tests/awk_scenarios/gawk/cli/variable_assignment.yaml @@ -0,0 +1,30 @@ +description: -v assignments are visible before BEGIN and file arguments set FILENAME and FNR +upstream: + suite: gawk + id: test/argtest.awk + ref: gawk-5.4.0 +covers: + - -v assignments are visible before BEGIN runs + - FILENAME is set to the current input file path + - FNR counts records within the current input file +setup: + files: + - path: records.txt + content: | + red + blue +input: + awk_args: + - -v + - label=color + program: | + BEGIN { print label } + { print FILENAME ":" FNR ":" $0 } + args: + - records.txt +expect: + stdout: | + color + records.txt:1:red + records.txt:2:blue + exit_code: 0 diff --git a/tests/awk_scenarios/gawk/expressions/arithmetic_comparison.yaml b/tests/awk_scenarios/gawk/expressions/arithmetic_comparison.yaml new file mode 100644 index 000000000..da3970ff2 --- /dev/null +++ b/tests/awk_scenarios/gawk/expressions/arithmetic_comparison.yaml @@ -0,0 +1,30 @@ +description: Arithmetic and comparison expressions drive conditional actions +upstream: + suite: gawk + id: test/compare.awk + ref: gawk-5.4.0 +covers: + - numeric addition updates an accumulator + - modulo participates in equality comparisons + - if and else choose actions from numeric comparisons +input: + program: | + { + total += $1 + if ($1 % 2 == 0) + print $1, "even" + else + print $1, "odd" + } + END { print "total", total } + stdin: | + 3 + 4 + 9 +expect: + stdout: | + 3 odd + 4 even + 9 odd + total 16 + exit_code: 0 diff --git a/tests/awk_scenarios/gawk/fields/assign_rebuilds_record.yaml b/tests/awk_scenarios/gawk/fields/assign_rebuilds_record.yaml new file mode 100644 index 000000000..1e117866e --- /dev/null +++ b/tests/awk_scenarios/gawk/fields/assign_rebuilds_record.yaml @@ -0,0 +1,27 @@ +description: Assigning a numbered field rebuilds $0 using OFS +upstream: + suite: gawk + id: test/assignnumfield.awk + ref: gawk-5.4.0 +covers: + - assigning to a numbered field changes that field + - rebuilding $0 after a field assignment uses OFS + - NF remains the number of fields when assigning an existing field +input: + program: | + BEGIN { OFS = "|" } + { + $2 = "patched" + print $0 + print NF + } + stdin: | + alpha beta gamma + solo pair +expect: + stdout: | + alpha|patched|gamma + 3 + solo|patched + 2 + exit_code: 0 diff --git a/tests/awk_scenarios/gawk/functions/split.yaml b/tests/awk_scenarios/gawk/functions/split.yaml new file mode 100644 index 000000000..724b03dbe --- /dev/null +++ b/tests/awk_scenarios/gawk/functions/split.yaml @@ -0,0 +1,23 @@ +description: split populates array elements and returns the element count +upstream: + suite: gawk + id: test/splitargv.awk + ref: gawk-5.4.0 +covers: + - split returns the number of elements it created + - split stores array elements using one-based numeric indexes + - split accepts a string field separator argument +input: + program: | + BEGIN { + n = split("north:south:east", parts, ":") + print n + for (i = 1; i <= n; i++) print i "=" parts[i] + } +expect: + stdout: | + 3 + 1=north + 2=south + 3=east + exit_code: 0 diff --git a/tests/awk_scenarios/gawk/output/print_separators.yaml b/tests/awk_scenarios/gawk/output/print_separators.yaml new file mode 100644 index 000000000..e845a5240 --- /dev/null +++ b/tests/awk_scenarios/gawk/output/print_separators.yaml @@ -0,0 +1,20 @@ +description: print uses OFS between arguments and ORS after each record +upstream: + suite: gawk + id: test/ofs1.awk + ref: gawk-5.4.0 +covers: + - print inserts OFS between arguments + - print appends ORS after the output record + - numeric arguments are formatted for print output +input: + program: | + BEGIN { + OFS = "::" + ORS = "\n" + print "left", "right", 7 + } +expect: + stdout: | + left::right::7 + exit_code: 0 diff --git a/tests/awk_scenarios/gawk/regex/pattern_match.yaml b/tests/awk_scenarios/gawk/regex/pattern_match.yaml new file mode 100644 index 000000000..43867e3d2 --- /dev/null +++ b/tests/awk_scenarios/gawk/regex/pattern_match.yaml @@ -0,0 +1,26 @@ +description: Regular expression patterns and !~ select records +upstream: + suite: gawk + id: test/re_test.awk + ref: gawk-5.4.0 +covers: + - regex patterns select matching input records + - anchors constrain regex matches to the whole record + - !~ negates a regex match expression +input: + program: | + /^[[:alpha:]]+[0-9]$/ { print NR ":" $0 } + $0 !~ /[aeiou]/ { print "no-vowel:" $0 } + stdin: | + abc1 + sky + lake2 + B7 +expect: + stdout: | + 1:abc1 + no-vowel:sky + 3:lake2 + 4:B7 + no-vowel:B7 + exit_code: 0 diff --git a/tests/awk_scenarios/onetrueawk/basic/pattern_action.yaml b/tests/awk_scenarios/onetrueawk/basic/pattern_action.yaml new file mode 100644 index 000000000..6823046da --- /dev/null +++ b/tests/awk_scenarios/onetrueawk/basic/pattern_action.yaml @@ -0,0 +1,25 @@ +description: Pattern-only and action-only rules compose across records +upstream: + suite: onetrueawk + id: testdir/t.3 + ref: 3c2e168a8f794ed61c93131b05fb998d79d155df + notes: Also covers the default action behavior represented by testdir/t.0. +covers: + - pattern-only rules print the current record + - action-only rules run for every record + - END actions can observe accumulated values +input: + program: | + /keep/ + { total += $2 } + END { print "total=" total } + stdin: | + keep 4 + drop 7 + keep 2 +expect: + stdout: | + keep 4 + keep 2 + total=13 + exit_code: 0 diff --git a/tests/awk_scenarios/upstream-map.yaml b/tests/awk_scenarios/upstream-map.yaml new file mode 100644 index 000000000..e43e29609 --- /dev/null +++ b/tests/awk_scenarios/upstream-map.yaml @@ -0,0 +1,101 @@ +# Tracks which upstream AWK tests have been represented by original rshell tests. +# This file is an audit ledger only; tests/awk_scenarios/enabled.txt controls +# which rewritten tests are executed. +entries: + - suite: gawk + id: test/beginfile1.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/basic/begin_end_records.yaml + covers: + - BEGIN and END action ordering + - record processing updates NR + + - suite: gawk + id: test/fieldwdth.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/basic/field_separator.yaml + covers: + - field splitting + - NF updates for the current record + + - suite: gawk + id: test/argtest.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/cli/variable_assignment.yaml + covers: + - -v assignment timing + - FILENAME and FNR for file input + + - suite: gawk + id: test/splitargv.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/functions/split.yaml + covers: + - split return value + - split array indexing + + - suite: gawk + id: test/assignnumfield.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/fields/assign_rebuilds_record.yaml + covers: + - numbered field assignment + - record rebuilding after field assignment + + - suite: gawk + id: test/compare.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/expressions/arithmetic_comparison.yaml + covers: + - arithmetic expressions + - numeric comparisons + + - suite: gawk + id: test/ofs1.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/output/print_separators.yaml + covers: + - OFS in print output + - ORS in print output + + - suite: gawk + id: test/re_test.awk + ref: gawk-5.4.0 + status: rewritten + tests: + - gawk/regex/pattern_match.yaml + covers: + - regex pattern matching + - negated regex matching + + - suite: onetrueawk + id: testdir/t.3 + ref: 3c2e168a8f794ed61c93131b05fb998d79d155df + status: rewritten + tests: + - onetrueawk/basic/pattern_action.yaml + covers: + - pattern-only rules + + - suite: onetrueawk + id: testdir/t.0 + ref: 3c2e168a8f794ed61c93131b05fb998d79d155df + status: rewritten + tests: + - onetrueawk/basic/pattern_action.yaml + covers: + - action-only rules diff --git a/tests/awk_scenarios_test.go b/tests/awk_scenarios_test.go new file mode 100644 index 000000000..d82a2bffc --- /dev/null +++ b/tests/awk_scenarios_test.go @@ -0,0 +1,359 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2026-present Datadog, Inc. + +package tests + +import ( + "bytes" + "context" + "errors" + "os" + "os/exec" + "path/filepath" + "sort" + "strconv" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" +) + +type awkScenario struct { + Description string `yaml:"description"` + Upstream awkUpstreamMetadata `yaml:"upstream"` + Covers []string `yaml:"covers"` + Skip string `yaml:"skip"` + Setup setup `yaml:"setup"` + Input awkInput `yaml:"input"` + Expect awkExpected `yaml:"expect"` +} + +type awkUpstreamMetadata struct { + Suite string `yaml:"suite"` + ID string `yaml:"id"` + Ref string `yaml:"ref"` + Notes string `yaml:"notes"` +} + +type awkInput struct { + AwkArgs []string `yaml:"awk_args"` + Program string `yaml:"program"` + ProgramFile string `yaml:"program_file"` + Args []string `yaml:"args"` + Stdin string `yaml:"stdin"` + Envs map[string]string `yaml:"envs"` +} + +type awkExpected struct { + Stdout string `yaml:"stdout"` + StdoutContains []string `yaml:"stdout_contains"` + Stderr string `yaml:"stderr"` + StderrContains []string `yaml:"stderr_contains"` + ExitCode int `yaml:"exit_code"` +} + +type awkResult struct { + stdout string + stderr string + exitCode int +} + +type awkUpstreamMap struct { + Entries []awkUpstreamMapEntry `yaml:"entries"` +} + +type awkUpstreamMapEntry struct { + Suite string `yaml:"suite"` + ID string `yaml:"id"` + Ref string `yaml:"ref"` + Status string `yaml:"status"` + Tests []string `yaml:"tests"` + Covers []string `yaml:"covers"` + Reason string `yaml:"reason"` +} + +func TestAwkScenarioMetadata(t *testing.T) { + scenariosDir := filepath.Join("awk_scenarios") + enabledPaths := loadEnabledAwkScenarios(t, filepath.Join(scenariosDir, "enabled.txt"), scenariosDir) + mapEntries := loadAwkUpstreamMap(t, filepath.Join(scenariosDir, "upstream-map.yaml"), scenariosDir) + + mappedTests := map[string]bool{} + for _, entry := range mapEntries { + for _, testPath := range entry.Tests { + mappedTests[filepath.Clean(filepath.FromSlash(testPath))] = true + } + } + for _, enabledPath := range enabledPaths { + require.True(t, mappedTests[enabledPath], "enabled awk scenario %s is missing from upstream-map.yaml", enabledPath) + loadAwkScenario(t, filepath.Join(scenariosDir, enabledPath)) + } +} + +func TestAwkScenarios(t *testing.T) { + if os.Getenv("RSHELL_AWK_TEST") == "" { + t.Skip("skipping awk scenario tests (set RSHELL_AWK_TEST=1 to enable)") + } + + scenariosDir := filepath.Join("awk_scenarios") + enabledPaths := loadEnabledAwkScenarios(t, filepath.Join(scenariosDir, "enabled.txt"), scenariosDir) + + candidate := os.Getenv("AWK_UNDER_TEST") + oracle := os.Getenv("GAWK_ORACLE") + if candidate == "" { + candidate = oracle + } + if candidate == "" { + t.Fatal("AWK_UNDER_TEST or GAWK_ORACLE must point to the awk binary under test") + } + + candidate = resolveAwkExecutable(t, candidate) + if oracle != "" { + oracle = resolveAwkExecutable(t, oracle) + } + timeout := awkScenarioTimeout(t) + + groups := groupAwkScenarioPaths(enabledPaths) + for _, group := range sortedMapKeys(groups) { + paths := groups[group] + t.Run(group, func(t *testing.T) { + for _, scenarioPath := range paths { + path := filepath.Join(scenariosDir, scenarioPath) + sc := loadAwkScenario(t, path) + name := strings.TrimSuffix(filepath.Base(scenarioPath), filepath.Ext(scenarioPath)) + t.Run(name, func(t *testing.T) { + if sc.Skip != "" { + t.Skip(sc.Skip) + } + + got := runAwkScenario(t, candidate, sc, timeout) + assertAwkExpectations(t, sc, got) + + if oracle != "" && candidate != oracle { + want := runAwkScenario(t, oracle, sc, timeout) + assert.Equal(t, want.exitCode, got.exitCode, "exit code mismatch against GNU awk oracle") + assert.Equal(t, want.stdout, got.stdout, "stdout mismatch against GNU awk oracle") + assert.Equal(t, want.stderr, got.stderr, "stderr mismatch against GNU awk oracle") + } + }) + } + }) + } +} + +func loadAwkUpstreamMap(t *testing.T, path, scenariosDir string) []awkUpstreamMapEntry { + t.Helper() + data, err := os.ReadFile(path) + require.NoError(t, err, "failed to read awk upstream map %s", path) + + var upstreamMap awkUpstreamMap + err = yaml.Unmarshal(data, &upstreamMap) + require.NoError(t, err, "failed to parse awk upstream map %s", path) + require.NotEmpty(t, upstreamMap.Entries, "awk upstream map %s must contain entries", path) + + for index, entry := range upstreamMap.Entries { + require.NotEmpty(t, entry.Suite, "awk upstream map entry %d must identify a suite", index) + require.NotEmpty(t, entry.ID, "awk upstream map entry %d must identify an upstream id", index) + require.NotEmpty(t, entry.Ref, "awk upstream map entry %d must identify an upstream ref", index) + require.NotEmpty(t, entry.Status, "awk upstream map entry %d must identify a status", index) + if entry.Status == "rewritten" || entry.Status == "policy" { + require.NotEmpty(t, entry.Tests, "awk upstream map entry %d must list local tests", index) + require.NotEmpty(t, entry.Covers, "awk upstream map entry %d must describe covered behavior", index) + } + if entry.Status == "deferred" { + require.NotEmpty(t, entry.Reason, "awk upstream map entry %d must explain deferral", index) + } + for _, testPath := range entry.Tests { + require.False(t, filepath.IsAbs(testPath), "awk upstream map entry %d test path must be relative: %s", index, testPath) + cleaned := filepath.Clean(filepath.FromSlash(testPath)) + require.False(t, cleaned == "." || strings.HasPrefix(cleaned, ".."+string(os.PathSeparator)) || cleaned == "..", "awk upstream map entry %d test path escapes scenarios dir: %s", index, testPath) + if entry.Status == "rewritten" { + require.FileExists(t, filepath.Join(scenariosDir, cleaned), "awk upstream map entry %d test path does not exist: %s", index, testPath) + } + } + } + return upstreamMap.Entries +} + +func loadAwkScenario(t *testing.T, path string) awkScenario { + t.Helper() + data, err := os.ReadFile(path) + require.NoError(t, err, "failed to read awk scenario file %s", path) + + var sc awkScenario + err = yaml.Unmarshal(data, &sc) + require.NoError(t, err, "failed to parse awk scenario file %s", path) + require.NotEmpty(t, sc.Description, "awk scenario %s must have a description", path) + require.NotEmpty(t, sc.Upstream.Suite, "awk scenario %s must identify an upstream suite", path) + require.NotEmpty(t, sc.Upstream.ID, "awk scenario %s must identify an upstream test id or coverage id", path) + require.NotEmpty(t, sc.Covers, "awk scenario %s must describe the behavior it covers", path) + return sc +} + +func loadEnabledAwkScenarios(t *testing.T, enabledPath, scenariosDir string) []string { + t.Helper() + + data, err := os.ReadFile(enabledPath) + require.NoError(t, err, "failed to read enabled awk scenario list %s", enabledPath) + + seen := map[string]int{} + var paths []string + for lineNumber, rawLine := range strings.Split(string(data), "\n") { + line := strings.TrimSpace(rawLine) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + require.False(t, filepath.IsAbs(line), "enabled awk scenario %s:%d must be relative", enabledPath, lineNumber+1) + cleaned := filepath.Clean(filepath.FromSlash(line)) + require.False(t, cleaned == "." || strings.HasPrefix(cleaned, ".."+string(os.PathSeparator)) || cleaned == "..", "enabled awk scenario %s:%d escapes scenarios dir: %s", enabledPath, lineNumber+1, line) + require.Contains(t, []string{".yaml", ".yml"}, filepath.Ext(cleaned), "enabled awk scenario %s:%d must point to a YAML file", enabledPath, lineNumber+1) + if previous, ok := seen[cleaned]; ok { + t.Fatalf("enabled awk scenario %s:%d duplicates line %d: %s", enabledPath, lineNumber+1, previous, line) + } + seen[cleaned] = lineNumber + 1 + require.FileExists(t, filepath.Join(scenariosDir, cleaned), "enabled awk scenario %s:%d does not exist", enabledPath, lineNumber+1) + paths = append(paths, cleaned) + } + require.NotEmpty(t, paths, "enabled awk scenario list %s is empty", enabledPath) + return paths +} + +func groupAwkScenarioPaths(paths []string) map[string][]string { + groups := make(map[string][]string) + for _, path := range paths { + group := filepath.ToSlash(filepath.Dir(path)) + groups[group] = append(groups[group], path) + } + for _, paths := range groups { + sort.Strings(paths) + } + return groups +} + +func runAwkScenario(t *testing.T, awkBin string, sc awkScenario, timeout time.Duration) awkResult { + t.Helper() + + dir := setupTestDir(t, scenario{Setup: sc.Setup}) + args := append([]string{}, sc.Input.AwkArgs...) + if sc.Input.ProgramFile != "" { + if sc.Input.Program != "" { + programPath := filepath.Join(dir, sc.Input.ProgramFile) + require.NoError(t, os.MkdirAll(filepath.Dir(programPath), 0755), "failed to create directories for %s", sc.Input.ProgramFile) + require.NoError(t, os.WriteFile(programPath, []byte(sc.Input.Program), 0644), "failed to write awk program %s", sc.Input.ProgramFile) + } + args = append(args, "-f", sc.Input.ProgramFile) + } else { + require.NotEmpty(t, sc.Input.Program, "awk scenario must provide program or program_file") + args = append(args, sc.Input.Program) + } + args = append(args, sc.Input.Args...) + + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + cmd := exec.CommandContext(ctx, awkBin, args...) + cmd.Dir = dir + cmd.Stdin = strings.NewReader(sc.Input.Stdin) + cmd.Env = append(os.Environ(), "LC_ALL=C", "TZ=UTC") + for k, v := range sc.Input.Envs { + cmd.Env = append(cmd.Env, k+"="+v) + } + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + if ctx.Err() != nil { + t.Fatalf("awk scenario timed out after %s", timeout) + } + + exitCode := 0 + if err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + exitCode = exitErr.ExitCode() + } else { + t.Fatalf("failed to run awk candidate %s: %v", awkBin, err) + } + } + + return awkResult{ + stdout: stdout.String(), + stderr: stderr.String(), + exitCode: exitCode, + } +} + +func assertAwkExpectations(t *testing.T, sc awkScenario, got awkResult) { + t.Helper() + + assert.Equal(t, sc.Expect.ExitCode, got.exitCode, "exit code mismatch") + if len(sc.Expect.StdoutContains) > 0 { + for _, substr := range sc.Expect.StdoutContains { + assert.Contains(t, got.stdout, substr, "stdout should contain %q", substr) + } + } else { + assert.Equal(t, sc.Expect.Stdout, got.stdout, "stdout mismatch") + } + + if len(sc.Expect.StderrContains) > 0 { + for _, substr := range sc.Expect.StderrContains { + assert.Contains(t, got.stderr, substr, "stderr should contain %q", substr) + } + } else { + assert.Equal(t, sc.Expect.Stderr, got.stderr, "stderr mismatch") + } +} + +func resolveAwkExecutable(t *testing.T, value string) string { + t.Helper() + + if filepath.IsAbs(value) { + require.FileExists(t, value, "awk executable does not exist") + return value + } + + if strings.ContainsRune(value, os.PathSeparator) { + root := repoRoot(t) + candidate := filepath.Join(root, value) + if _, err := os.Stat(candidate); err == nil { + return candidate + } + wd, err := os.Getwd() + require.NoError(t, err) + return filepath.Join(wd, value) + } + + resolved, err := exec.LookPath(value) + require.NoError(t, err, "awk executable %q not found on PATH", value) + return resolved +} + +func awkScenarioTimeout(t *testing.T) time.Duration { + t.Helper() + + value := os.Getenv("RSHELL_AWK_SCENARIO_TIMEOUT") + if value == "" { + return 10 * time.Second + } + if seconds, err := strconv.Atoi(value); err == nil { + return time.Duration(seconds) * time.Second + } + timeout, err := time.ParseDuration(value) + require.NoError(t, err, "invalid RSHELL_AWK_SCENARIO_TIMEOUT") + return timeout +} + +func sortedMapKeys[V any](m map[string]V) []string { + keys := make([]string, 0, len(m)) + for key := range m { + keys = append(keys, key) + } + sort.Strings(keys) + return keys +} diff --git a/tools/awk-harness/README.md b/tools/awk-harness/README.md index 8fc993c6e..c4fba72a5 100644 --- a/tools/awk-harness/README.md +++ b/tools/awk-harness/README.md @@ -62,9 +62,17 @@ The harness rejects a `GAWK_ORACLE` whose `gawk --version` does not match ## Usage +Run the rshell-owned rewritten AWK scenarios against the pinned GNU awk oracle: + +```bash +tools/awk-harness/run.sh install-gawk +tools/awk-harness/run.sh rewritten +``` + Point `AWK_UNDER_TEST` at the candidate binary to test: ```bash +AWK_UNDER_TEST=/path/to/awk tools/awk-harness/run.sh rewritten AWK_UNDER_TEST=/path/to/awk tools/awk-harness/run.sh gawk AWK_UNDER_TEST=/path/to/awk tools/awk-harness/run.sh onetrueawk AWK_UNDER_TEST=/path/to/awk tools/awk-harness/run.sh all @@ -74,6 +82,7 @@ For rshell, use the adapter that turns awk argv into an rshell `-c` command: ```bash make build +RSHELL_BIN=./rshell AWK_UNDER_TEST=tools/awk-harness/rshell-awk tools/awk-harness/run.sh rewritten RSHELL_BIN=./rshell AWK_UNDER_TEST=tools/awk-harness/rshell-awk tools/awk-harness/run.sh all ``` @@ -152,6 +161,24 @@ GAWK_TEST_LIMIT=25 AWK_UNDER_TEST=/path/to/awk tools/awk-harness/run.sh gawk `GAWK_TEST_MODE=make-check` is available for experiments with gawk's native test harness. It may require GNU build tools and is not the default. +## Rewritten Local Scenarios + +`tools/awk-harness/run.sh rewritten` runs the local AWK scenario rewrites listed +in `tests/awk_scenarios/enabled.txt`. These files are rshell-owned tests, not +vendored upstream tests. Each scenario carries upstream metadata and a `covers` +list so we can track which GNU awk or One True Awk behavior it rewrites. + +`tests/awk_scenarios/upstream-map.yaml` is an audit ledger for upstream rewrite +coverage. It is not a run list; `enabled.txt` is the single source of truth for +which rewritten tests execute. + +When `AWK_UNDER_TEST` is unset, `rewritten` runs the scenarios against the +pinned GNU awk oracle. This lets CI validate the local test definitions before +rshell has an `awk` builtin. + +When `AWK_UNDER_TEST` is set, `rewritten` runs the candidate and compares it to +both the scenario's expected result and the pinned GNU awk oracle. + ## Outputs Results and logs are written under: diff --git a/tools/awk-harness/run-rewritten.sh b/tools/awk-harness/run-rewritten.sh new file mode 100755 index 000000000..490870fc1 --- /dev/null +++ b/tools/awk-harness/run-rewritten.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/lib.sh" + +oracle="$(resolve_gawk_oracle)" +if [ -z "${AWK_UNDER_TEST:-}" ]; then + AWK_UNDER_TEST="$oracle" +else + AWK_UNDER_TEST="$(resolve_awk_under_test)" +fi + +export GAWK_ORACLE="$oracle" +export AWK_UNDER_TEST +export RSHELL_AWK_TEST=1 + +log "running rewritten AWK scenarios" +log "using candidate: $AWK_UNDER_TEST" +log "using GNU awk oracle: $GAWK_ORACLE ($("$GAWK_ORACLE" --version | sed -n '1p'))" + +(cd "$REPO_ROOT" && go test -v ./tests -run TestAwkScenarios -count=1) diff --git a/tools/awk-harness/run.sh b/tools/awk-harness/run.sh index 9ffae074d..8aadf84e6 100755 --- a/tools/awk-harness/run.sh +++ b/tools/awk-harness/run.sh @@ -9,6 +9,7 @@ usage() { Usage: tools/awk-harness/run.sh TARGET Targets: + rewritten Run rshell-owned AWK scenario rewrites. onetrueawk Fetch and run One True Awk tests against the GNU awk oracle. gawk Fetch and run GNU awk tests against the GNU awk oracle. all Run gawk, then onetrueawk. @@ -26,6 +27,7 @@ Oracle: Useful environment variables: AWK_HARNESS_BOOTSTRAP=1 Fetch and summarize upstream tests only. AWK_HARNESS_CACHE=DIR Cache external repos and results. + RSHELL_AWK_SCENARIO_TIMEOUT=D Duration or seconds for local rewritten tests. ONETRUEAWK_REF=REF One True Awk commit, tag, or branch. ONETRUEAWK_SUITE=core|all|... One True Awk suites to run. GAWK_ORACLE=/path/to/gawk Trusted GNU awk binary used as oracle. @@ -44,6 +46,9 @@ if [ -z "$target" ] || [ "$target" = "-h" ] || [ "$target" = "--help" ]; then fi case "$target" in + rewritten) + exec "$SCRIPT_DIR/run-rewritten.sh" + ;; onetrueawk) exec "$SCRIPT_DIR/run-onetrueawk.sh" ;;