Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion .github/workflows/regression.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
#if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')"
env:
LANGS: "go rust python typescript"
LANGS: "go rust python typescript cxx cpp"
# ignore package version for Go e.g. 'a.b/c@506fb8ece467f3a71c29322169bef9b0bc92d554'
DIFFJSON_IGNORE: >
['id']
Expand Down Expand Up @@ -64,6 +64,11 @@ jobs:
with:
node-version: '22'

- name: Setup clangd-18
run: |
sudo apt-get update
sudo apt-get install -y clangd-18

- name: Compile both binaries
run: |
(cd main_repo && go build -o ../abcoder_old)
Expand Down
63 changes: 0 additions & 63 deletions go.sum

Large diffs are not rendered by default.

51 changes: 39 additions & 12 deletions lang/collect/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

sitter "github.com/smacker/go-tree-sitter"

"github.com/cloudwego/abcoder/lang/cpp"
"github.com/cloudwego/abcoder/lang/cxx"
"github.com/cloudwego/abcoder/lang/java"
javaipc "github.com/cloudwego/abcoder/lang/java/ipc"
Expand Down Expand Up @@ -124,6 +125,8 @@ func switchSpec(l uniast.Language, repo string) LanguageSpec {
return python.NewPythonSpec()
case uniast.Java:
return java.NewJavaSpec(repo)
case uniast.Cpp:
return cpp.NewCppSpec()
default:
panic(fmt.Sprintf("unsupported language %s", l))
}
Expand Down Expand Up @@ -1836,11 +1839,13 @@ func (c *Collector) getSymbolByLocation(ctx context.Context, loc Location, depth
// return sym, nil
// }

// 1. already loaded
// Optimization: only search in symbols of the same file
if fileSyms, ok := c.symsByFile[loc.URI]; ok {
if sym := c.findMatchingSymbolIn(loc, fileSyms); sym != nil {
return sym, nil
if !(from.Type == "typeParameter" && c.Language == uniast.Cpp) {
// 1. already loaded
// Optimization: only search in symbols of the same file
if fileSyms, ok := c.symsByFile[loc.URI]; ok {
if sym := c.findMatchingSymbolIn(loc, fileSyms); sym != nil {
return sym, nil
}
}
}

Expand Down Expand Up @@ -2070,11 +2075,11 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept

// function info: type params, inputs, outputs, receiver (if !needImpl)
if sym.Kind == SKFunction || sym.Kind == SKMethod {
var rsym *dependency
var rd *dependency
rec, tps, ips, ops := c.spec.FunctionSymbol(*sym)

if !hasImpl && rec >= 0 {
if (!hasImpl || c.Language == uniast.Cpp) && rec >= 0 {
rsym, err := c.getSymbolByTokenWithLimit(ctx, sym.Tokens[rec], depth)
rd = &dependency{sym.Tokens[rec].Location, rsym}
if err != nil || rsym == nil {
log.Error("get receiver symbol for token %v failed: %v\n", rec, err)
}
Expand All @@ -2083,6 +2088,18 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
ipsyms, is := c.getDepsWithLimit(ctx, sym, ips, depth-1)
opsyms, os := c.getDepsWithLimit(ctx, sym, ops, depth-1)

// filter tsym is type parameter
if c.Language == uniast.Cpp {
tsFiltered := make([]dependency, 0, len(ts))
for _, d := range ts {
if d.Symbol == nil || d.Symbol.Kind == SKTypeParameter {
continue
}
tsFiltered = append(tsFiltered, d)
}
ts = tsFiltered
}

//get last token of params for get signature
lastToken := rec
for _, t := range tps {
Expand All @@ -2101,18 +2118,28 @@ func (c *Collector) processSymbol(ctx context.Context, sym *DocumentSymbol, dept
}
}

c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rsym, lastToken)
c.updateFunctionInfo(sym, tsyms, ipsyms, opsyms, ts, is, os, rd, lastToken)
}

// variable info: type
if sym.Kind == SKVariable || sym.Kind == SKConstant {
i := c.spec.DeclareTokenOfSymbol(*sym)
// in cpp, it should search form behind to front to find the first entity token
// find first entity token
for i = i + 1; i < len(sym.Tokens); i++ {
if c.spec.IsEntityToken(sym.Tokens[i]) {
break
if c.Language == uniast.Cpp {
for i = i - 1; i >= 0; i-- {
if c.spec.IsEntityToken(sym.Tokens[i]) {
break
}
}
} else {
for i = i + 1; i < len(sym.Tokens); i++ {
if c.spec.IsEntityToken(sym.Tokens[i]) {
break
}
}
}

if i < 0 || i >= len(sym.Tokens) {
log.Error("get type token of variable symbol %s failed\n", sym)
return
Expand Down
172 changes: 170 additions & 2 deletions lang/collect/export.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"strings"

"github.com/cloudwego/abcoder/lang/log"
"github.com/cloudwego/abcoder/lang/lsp"
. "github.com/cloudwego/abcoder/lang/lsp"
"github.com/cloudwego/abcoder/lang/uniast"
"github.com/cloudwego/abcoder/lang/utils"
Expand Down Expand Up @@ -336,6 +337,21 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
// NOTICE: use refName as id when symbol name is missing
name = refName
}

if c.Language == uniast.Cpp {
// for function override, use call signature as id
if symbol.Kind == SKMethod || symbol.Kind == SKFunction {
name = c.extractCppCallSig(symbol)
}

// join name with namespace
if ns := c.scopePrefix(symbol); ns != "" {
if !strings.HasPrefix(name, ns+"::") {
name = ns + "::" + name
}
}
}

tmp := uniast.NewIdentity(mod, path, name)
id = &tmp
// Save to visited ONLY WHEN no errors occur
Expand Down Expand Up @@ -461,7 +477,22 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
id.Name = iid.Name + "<" + id.Name + ">"
}
}
if k == SKFunction {

// cpp get method name without class name and namespace
if c.Language == uniast.Cpp && rid != nil {
p := strings.IndexByte(name, '(')
head, tail := name, ""
if p >= 0 {
head, tail = name[:p], name[p:]
}

if idx := strings.LastIndex(head, "::"); idx >= 0 {
head = head[idx+2:]
}
name = head + tail
}

if k == SKFunction || c.Language == uniast.Cpp {
// NOTICE: class static method name is: type::method
id.Name += "::" + name
} else {
Expand Down Expand Up @@ -555,7 +586,17 @@ func (c *Collector) exportSymbol(repo *uniast.Repository, symbol *DocumentSymbol
continue
}
// NOTICE: use method name as key here
obj.Methods[method.Name] = *mid
if c.Language == uniast.Cpp {
methodName := c.cppBaseName(method.Name)
_, methodExist := obj.Methods[methodName]
isHeaderMethod := strings.HasSuffix(method.Location.URI.File(), ".h")
if methodExist && isHeaderMethod {
continue
}
obj.Methods[methodName] = *mid
} else {
obj.Methods[method.Name] = *mid
}
}
}
obj.Identity = *id
Expand Down Expand Up @@ -604,3 +645,130 @@ func mapKind(kind SymbolKind) uniast.TypeKind {
panic(fmt.Sprintf("unexpected kind %v", kind))
}
}

func (c *Collector) scopePrefix(sym *DocumentSymbol) string {
parts := []string{}
cur := sym
for {
p := c.cli.GetParent(cur)
if p == nil {
break
}
if p.Kind == lsp.SKNamespace {
if p.Name != "" {
parts = append([]string{p.Name}, parts...)
}
}
cur = p
}
return strings.Join(parts, "::") // "a::b"
}

func (c *Collector) cppBaseName(n string) string {
n = strings.TrimSpace(n)
if i := strings.LastIndex(n, "::"); i >= 0 {
n = n[i+2:]
}
n = strings.TrimSpace(n)
// optional: strip template args on the function name itself: foo<T> -> foo
if j := strings.IndexByte(n, '<'); j >= 0 {
n = n[:j]
}
return strings.TrimSpace(n)
}

// extractCppCallSig returns "sym.Name(params)" where params is extracted from sym.Text.
func (c *Collector) extractCppCallSig(sym *lsp.DocumentSymbol) (ret string) {
name := strings.TrimSpace(sym.Name)
if name == "" {
return ""
}
text := sym.Text
if text == "" {
return name + "()"
}

want := c.cppBaseName(name)
if want == "" {
want = name
}
fallback := name + "()"

isIdent := func(b byte) bool {
return (b >= 'a' && b <= 'z') ||
(b >= 'A' && b <= 'Z') ||
(b >= '0' && b <= '9') ||
b == '_'
}
isWholeIdentAt := func(s string, pos int, w string) bool {
if pos < 0 || pos+len(w) > len(s) || s[pos:pos+len(w)] != w {
return false
}
if pos > 0 && isIdent(s[pos-1]) {
return false
}
if pos+len(w) < len(s) && isIdent(s[pos+len(w)]) {
return false
}
return true
}
findMatchingParenIn := func(s string, openIdx int, end int) int {
if openIdx < 0 || openIdx >= len(s) || s[openIdx] != '(' {
return -1
}
if end > len(s) {
end = len(s)
}
depth := 0
for i := openIdx; i < end; i++ {
switch s[i] {
case '(':
depth++
case ')':
depth--
if depth == 0 {
return i
}
}
}
return -1
}

headerEnd := len(text)
if i := strings.IndexByte(text, '{'); i >= 0 && i < headerEnd {
headerEnd = i
}
if i := strings.IndexByte(text, ';'); i >= 0 && i < headerEnd {
headerEnd = i
}
header := text[:headerEnd]

namePos := -1
for i := 0; i+len(want) <= len(header); i++ {
if isWholeIdentAt(header, i, want) {
namePos = i
break
}
}
if namePos < 0 {
return fallback
}

openIdx := -1
for i := namePos + len(want); i < len(header); i++ {
if header[i] == '(' {
openIdx = i
break
}
}
if openIdx < 0 {
return fallback
}

closeIdx := findMatchingParenIn(header, openIdx, len(header))
if closeIdx < 0 {
return fallback
}

return name + header[openIdx:closeIdx+1]
}
46 changes: 46 additions & 0 deletions lang/cpp/lib.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright 2025 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cpp

import (
"fmt"
"time"

"github.com/cloudwego/abcoder/lang/uniast"
"github.com/cloudwego/abcoder/lang/utils"
)

const MaxWaitDuration = 5 * time.Minute

func InstallLanguageServer() (string, error) {
return "", fmt.Errorf("please install clangd-18 manually. See https://releases.llvm.org/ (clangd is in clang-extra)")
}

func GetDefaultLSP() (lang uniast.Language, name string) {
return uniast.Cpp, "clangd-18 --background-index=false -j=2 --clang-tidy=false"
}

func CheckRepo(repo string) (string, time.Duration) {
openfile := ""
// TODO: check if the project compiles.

// NOTICE: wait for Rust projects based on code files
_, size := utils.CountFiles(repo, ".cpp", "build/")
wait := 2*time.Second + time.Second*time.Duration(size/1024)
if wait > MaxWaitDuration {
wait = MaxWaitDuration
}
return openfile, wait
}
Loading
Loading