From 0811b0d98be229d5055783a4ce3d92cb442883e3 Mon Sep 17 00:00:00 2001 From: "lqw@128C" Date: Thu, 7 May 2026 17:17:23 +0800 Subject: [PATCH] fix(batch_clone_build): place extgen.sh in separate directory place extgen.sh in separate directory instead of repository directory. pass PROJROOT, REPO_DIR, OUTPUT_DIR, DB_EXT_DIR to genScript Fixes #20. --- README.md | 4 +-- cmd/batch_clone_build/extgen.go | 33 ++++++++++++++++++++++++- config/repo.go | 1 + demo.yaml | 9 ++++++- doc/adapters/escape_analysis.md | 5 ++++ repos/test/batchmalloc/extgen.sh | 8 ------ repos/test/malloc_test/extgen.sh | 9 ------- yaml-examples/doltdb.yaml | 2 +- yaml-examples/extgens/batchmalloc.sh | 6 +++++ yaml-examples/extgens/dolt.sh | 5 ++++ yaml-examples/extgens/kitex-examples.sh | 8 ++++++ yaml-examples/extgens/malloc_test.sh | 7 ++++++ yaml-examples/kitex.yaml | 2 +- yaml-examples/malloc_test.yaml | 4 ++- yaml-examples/profile_test.yaml | 2 +- 15 files changed, 80 insertions(+), 25 deletions(-) delete mode 100755 repos/test/batchmalloc/extgen.sh delete mode 100755 repos/test/malloc_test/extgen.sh create mode 100755 yaml-examples/extgens/batchmalloc.sh create mode 100755 yaml-examples/extgens/dolt.sh create mode 100755 yaml-examples/extgens/kitex-examples.sh create mode 100755 yaml-examples/extgens/malloc_test.sh diff --git a/README.md b/README.md index a81aa6a..8e61f1d 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Create your `stat.yaml` config file according to [`example.yaml`](./example.yaml - `sources`: Define repository sources with prefixes and specific repositories - `language`: Specify the programming language for analysis (e.g., go) - `buildGrps`: Configure build groups with timeout and build commands -- `externalGenGrps`: Generate external predicates (like escape analysis data) +- `externalGenGrps`: Generate external predicates (like escape analysis, profiling data) - `queryconfig`: Set up query execution with parallelization options - `queryGrps`: Define query groups with specific queries and target repositories @@ -82,7 +82,7 @@ QLStat supports extending CodeQL with escape analysis data through the escape ad 1. `goescape` is actually the command `go build -a -gcflags=all=-m=2 .` 2. You can also specify your own script with only one constraint: Generate `m2.log` in `$logRoot/extgen/path/to/repo/m2.log` 2. This generates escape analysis data during the build phase -3. Reference the external predicate in your query group with `externals: - movedToHeap` +3. Reference the external predicate in your query group with `externals: [movedToHeap, newEscapesToHeap]` 4. Use the external predicate in your CodeQL queries For more details about how the escape analysis extension works, see [Escape Analysis Documentation](doc/adapters/escape_analysis.md). diff --git a/cmd/batch_clone_build/extgen.go b/cmd/batch_clone_build/extgen.go index 99a152b..4819b57 100644 --- a/cmd/batch_clone_build/extgen.go +++ b/cmd/batch_clone_build/extgen.go @@ -2,6 +2,8 @@ package main import ( "fmt" + "log" + "os" "os/exec" "path/filepath" "strings" @@ -86,6 +88,35 @@ func adaptEscape(cfg *config.Artifact, repo config.Repo) { cmd.Stdout, cmd.Stderr = outFile, errFile _ = cmd.Run() } + +func abspath(path string) string { + p, err := filepath.Abs(path) + if err != nil { + log.Fatalf("Fail to get absolute path: %v", err) + } + return p +} + +/* +genScriptEnv generate environment variables for the script + + REPO_DIR is the root directory of the repository + + OUTPUT_DIR is the directory to store intermediate results for generating external predicate + + PROJROOT is the root directory of the project + + DB_EXT_DIR is the directory to store external predicate database +*/ +func genScriptEnv(cfg *config.Artifact, repo config.Repo) []string { + return []string{ + "REPO_DIR=" + abspath(repo.DirPath(cfg.RepoRoot)), + "OUTPUT_DIR=" + abspath(repo.DirPath(extgenLogDir(cfg))), + "PROJROOT=" + abspath(utils.ProjectRoot()), + "DB_EXT_DIR=" + abspath(repo.DBExtDir(cfg.DBRoot)), + } +} + func genscript(cfg *config.Artifact, repo config.Repo, script string) { outFile, errFile := utils.CreateOutAndErr(filepath.Join(repo.DirPath(extgenLogDir(cfg)), "runscript")) defer outFile.Close() @@ -97,7 +128,7 @@ func genscript(cfg *config.Artifact, repo config.Repo, script string) { } else { cmd = exec.Command(elems[0], elems[1:]...) } - cmd.Dir = repo.DirPath(cfg.RepoRoot) // run genscript in $repoRoot/path/to/repo + cmd.Env = append(os.Environ(), genScriptEnv(cfg, repo)...) cmd.Stdout, cmd.Stderr = outFile, errFile fmt.Printf("cwd: %s, out: %s, err: %s, cmd: %s\n", cmd.Dir, outFile.Name(), errFile.Name(), cmd.String()) _ = cmd.Run() diff --git a/config/repo.go b/config/repo.go index 3717503..efecdad 100644 --- a/config/repo.go +++ b/config/repo.go @@ -26,6 +26,7 @@ func (r *Repo) RemoteURL() string { return url } +// DirPath returns root// func (r *Repo) DirPath(root string) string { return filepath.Join(r.GitSource.HostDir(root), r.DirBaseName) } diff --git a/demo.yaml b/demo.yaml index be67b92..bea67d7 100644 --- a/demo.yaml +++ b/demo.yaml @@ -32,6 +32,11 @@ buildGrps: # generate external predicates predicate # For repositories in each group, same genScript will be applied in the root directory of repositories # "goescape" means `go build -a -gcflags=-m=2 ./...`. The stderr will be redirected to $logRoot/path/to/repo/m2.log. Then escape_adapter is used to generate databases. The external predicate database is generated in $dbRoot/path/to/repo/ext/$external.csv. +# If you use custom genScript, then 4 environment variables will be set: +# - PROJROOT: the root directory of the project +# - REPO_DIR: the root directory of repository +# - OUTPUT_DIR: the directory to store intermediate results/log to generate external predicate database +# - DB_EXT_DIR: the directory to store external predicate database externalGenGrps: - genRepos: - rclone/rclone @@ -39,8 +44,10 @@ externalGenGrps: genScript: goescape - genRepos: - Lslightly/dolt + genScript: yaml-examples/extgens/dolt.sh + - genRepos: - Lslightly/kitex-examples - genScript: ./extgen.sh + genScript: yaml-examples/extgens/kitex-examples.sh # query queryconfig: diff --git a/doc/adapters/escape_analysis.md b/doc/adapters/escape_analysis.md index a1ff4d6..97f0832 100644 --- a/doc/adapters/escape_analysis.md +++ b/doc/adapters/escape_analysis.md @@ -39,6 +39,11 @@ queryconfig: In this configuration: - `genScript: goescape` instructs the system to compile repositories with escape analysis enabled + - Note that if you use custom script, 4 environment variables will be set. You can use them in your custom script. + - `REPO_DIR`: The directory path of the repository + - `OUTPUT_DIR`: The directory path of the output log file + - `PROJROOT`: The project root directory + - `DB_EXT_DIR`: The directory to store external predicate database - `externals: [movedToHeap]` makes the escape analysis data available to the specified queries. The external data table is located in `$dbRoot//ext/movedToHeap.csv`. - Convinient Option: `externalFiles: [yaml-template/escape.yaml]` specifies the YAML file that defines the escape analysis predicates. It will be automatically loaded and added to `externals`. - The query [`escape_ext/moved_to_heap_var_test.ql`](../../qlsrc/escape_ext/moved_to_heap_var_test.ql) can then use the `movedToHeap` predicate to identify variables that escape to the heap diff --git a/repos/test/batchmalloc/extgen.sh b/repos/test/batchmalloc/extgen.sh deleted file mode 100755 index 15874bb..0000000 --- a/repos/test/batchmalloc/extgen.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -ROOT=$(pwd) -PROJROOT=$ROOT/../../.. -logdir=$PROJROOT/logs/extgen/test/batchmalloc -go build -a -gcflags=-m=2 . &> $logdir/m2.log -cd $PROJROOT -go run ./cmd/escape_adapter -dir codeql-db/test/batchmalloc/ext -src=$ROOT -movedToHeap -newEscapesToHeap $logdir/m2.log - diff --git a/repos/test/malloc_test/extgen.sh b/repos/test/malloc_test/extgen.sh deleted file mode 100755 index b904031..0000000 --- a/repos/test/malloc_test/extgen.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -ROOT=$(pwd) -PROJROOT=$ROOT/../../.. -logdir=$PROJROOT/logs/extgen/test/malloc_test -go test -c -a -gcflags=-m=2 . &> $logdir/m2.log -go test -run ^$ -bench . -cpuprofile $logdir/cpu.out &> $logdir/bench.log -cd $PROJROOT -go run ./cmd/escape_adapter -dir codeql-db/test/malloc_test/ext -src=$ROOT -movedToHeap -newEscapesToHeap $logdir/m2.log -go run ./cmd/pprof2qlcsv/ -dir codeql-db/test/malloc_test/ext $logdir/cpu.out diff --git a/yaml-examples/doltdb.yaml b/yaml-examples/doltdb.yaml index b4963e1..ba9c560 100644 --- a/yaml-examples/doltdb.yaml +++ b/yaml-examples/doltdb.yaml @@ -23,7 +23,7 @@ buildGrps: externalGenGrps: - genRepos: - dolthub/dolt - genScript: ./extgen.sh + genScript: yaml-examples/extgens/dolt.sh # query queryconfig: diff --git a/yaml-examples/extgens/batchmalloc.sh b/yaml-examples/extgens/batchmalloc.sh new file mode 100755 index 0000000..f5e08fe --- /dev/null +++ b/yaml-examples/extgens/batchmalloc.sh @@ -0,0 +1,6 @@ +#!/bin/bash +cd $REPO_DIR +go build -a -gcflags=-m=2 . &> $OUTPUT_DIR/m2.log +cd $PROJROOT +go run ./cmd/escape_adapter -dir $DB_EXT_DIR -src=$REPO_DIR -movedToHeap -newEscapesToHeap $OUTPUT_DIR/m2.log + diff --git a/yaml-examples/extgens/dolt.sh b/yaml-examples/extgens/dolt.sh new file mode 100755 index 0000000..74bc01e --- /dev/null +++ b/yaml-examples/extgens/dolt.sh @@ -0,0 +1,5 @@ +#!/bin/bash +cd $REPO_DIR/go/ +go build -a -gcflags=-m=2 ./... 2> $OUTPUT_DIR/m2.log +cd $PROJROOT +go run ./cmd/escape_adapter -dir $DB_EXT_DIR -src=$REPO_DIR -movedToHeap $OUTPUT_DIR/m2.log diff --git a/yaml-examples/extgens/kitex-examples.sh b/yaml-examples/extgens/kitex-examples.sh new file mode 100755 index 0000000..4968412 --- /dev/null +++ b/yaml-examples/extgens/kitex-examples.sh @@ -0,0 +1,8 @@ +#!/bin/bash +cd $REPO_DIR/hello +> $OUTPUT_DIR/m2.log +go build -a -gcflags=all=-m=2 . 2>> $OUTPUT_DIR/m2.log +go build -a -gcflags=all=-m=2 -o ./client-bin ./client 2>> $OUTPUT_DIR/m2.log +cd $PROJROOT +go run ./cmd/escape_adapter -dir $DB_EXT_DIR -src=$REPO_DIR/hello -movedToHeap $OUTPUT_DIR/m2.log + diff --git a/yaml-examples/extgens/malloc_test.sh b/yaml-examples/extgens/malloc_test.sh new file mode 100755 index 0000000..f1a078a --- /dev/null +++ b/yaml-examples/extgens/malloc_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +cd $REPO_DIR +go test -c -a -gcflags=-m=2 . &> $OUTPUT_DIR/m2.log +go test -run ^$ -bench . -cpuprofile $OUTPUT_DIR/cpu.out &> $OUTPUT_DIR/bench.log +cd $PROJROOT +go run ./cmd/escape_adapter -dir $DB_EXT_DIR -src=$REPO_DIR -movedToHeap -newEscapesToHeap $OUTPUT_DIR/m2.log +go run ./cmd/pprof2qlcsv/ -dir $DB_EXT_DIR $OUTPUT_DIR/cpu.out diff --git a/yaml-examples/kitex.yaml b/yaml-examples/kitex.yaml index 24ebef4..4e598a0 100644 --- a/yaml-examples/kitex.yaml +++ b/yaml-examples/kitex.yaml @@ -22,7 +22,7 @@ buildGrps: externalGenGrps: - genRepos: - "-" - genScript: ./extgen.sh + genScript: yaml-examples/extgens/kitex-examples.sh # query queryconfig: diff --git a/yaml-examples/malloc_test.yaml b/yaml-examples/malloc_test.yaml index 78d2ae5..4fcaf71 100644 --- a/yaml-examples/malloc_test.yaml +++ b/yaml-examples/malloc_test.yaml @@ -39,8 +39,10 @@ buildGrps: externalGenGrps: - genRepos: - malloc_test + genScript: yaml-examples/extgens/malloc_test.sh + - genRepos: - batchmalloc - genScript: ./extgen.sh + genScript: yaml-examples/extgens/batchmalloc.sh # query queryconfig: diff --git a/yaml-examples/profile_test.yaml b/yaml-examples/profile_test.yaml index 7043570..b2a5085 100644 --- a/yaml-examples/profile_test.yaml +++ b/yaml-examples/profile_test.yaml @@ -35,7 +35,7 @@ buildGrps: externalGenGrps: - genRepos: - malloc_test - genScript: ./extgen.sh + genScript: yaml-examples/extgens/malloc_test.sh # query queryconfig: