diff --git a/internal/cbm/lsp/go_lsp.h b/internal/cbm/lsp/go_lsp.h index e13f8f52..8ab3e268 100644 --- a/internal/cbm/lsp/go_lsp.h +++ b/internal/cbm/lsp/go_lsp.h @@ -82,6 +82,7 @@ typedef struct { const char* method_names_str; // "|"-separated method names for interfaces (e.g. "Get|Put|Delete") bool is_interface; CBMLanguage lang; // language of the file that defined this — used by Tier 2 per-language registry build to filter all_defs + const char* namespace_name; // declared namespace/package for source-root-independent JVM filtering } CBMLSPDef; // Parse source, build registry from defs + stdlib, run LSP. diff --git a/internal/cbm/lsp/kotlin_lsp.c b/internal/cbm/lsp/kotlin_lsp.c index 7a3d843a..cd4de270 100644 --- a/internal/cbm/lsp/kotlin_lsp.c +++ b/internal/cbm/lsp/kotlin_lsp.c @@ -4136,6 +4136,85 @@ void cbm_run_kotlin_lsp(CBMArena *arena, CBMFileResult *result, const char *sour * a call site in another file resolves to the right node. Types and functions * keep their full project-qualified QN; functions carry receiver_type so the * sole-definer fallback can tell a top-level fun from a method. */ +static const char *kt_cross_builtin_return_qn(const char *name) { + if (!name) { + return NULL; + } + if (strcmp(name, "String") == 0) { + return "kotlin.String"; + } + if (strcmp(name, "Int") == 0 || strcmp(name, "Integer") == 0) { + return "kotlin.Int"; + } + if (strcmp(name, "Long") == 0) { + return "kotlin.Long"; + } + if (strcmp(name, "Float") == 0) { + return "kotlin.Float"; + } + if (strcmp(name, "Double") == 0) { + return "kotlin.Double"; + } + if (strcmp(name, "Boolean") == 0 || strcmp(name, "Bool") == 0) { + return "kotlin.Boolean"; + } + if (strcmp(name, "Char") == 0 || strcmp(name, "Character") == 0) { + return "kotlin.Char"; + } + if (strcmp(name, "Byte") == 0) { + return "kotlin.Byte"; + } + if (strcmp(name, "Short") == 0) { + return "kotlin.Short"; + } + if (strcmp(name, "Unit") == 0 || strcmp(name, "Void") == 0 || strcmp(name, "void") == 0) { + return "kotlin.Unit"; + } + if (strcmp(name, "Any") == 0 || strcmp(name, "Object") == 0) { + return "kotlin.Any"; + } + return NULL; +} + +static const CBMType *kt_cross_return_type(CBMArena *arena, const CBMLSPDef *d) { + if (!arena || !d || !d->return_types || !d->return_types[0]) { + return NULL; + } + const char *text = d->return_types; + const char *bar = strchr(text, '|'); + const char *first = bar ? cbm_arena_strndup(arena, text, (size_t)(bar - text)) : text; + if (!first || !first[0]) { + return NULL; + } + if (!strchr(first, '.')) { + const char *builtin = kt_cross_builtin_return_qn(first); + if (builtin) { + first = builtin; + } else if (d->namespace_name && d->namespace_name[0]) { + first = kt_join_dot(arena, d->namespace_name, first); + } + } + return cbm_type_named(arena, first); +} + +static const CBMType *kt_cross_func_sig_with_return(CBMArena *arena, const CBMLSPDef *d) { + const CBMType *ret = kt_cross_return_type(arena, d); + if (!ret || cbm_type_is_unknown(ret)) { + return NULL; + } + const char **empty_pn = (const char **)cbm_arena_alloc(arena, sizeof(*empty_pn)); + const CBMType **empty_pt = (const CBMType **)cbm_arena_alloc(arena, sizeof(*empty_pt)); + const CBMType **rets = (const CBMType **)cbm_arena_alloc(arena, 2 * sizeof(*rets)); + if (!empty_pn || !empty_pt || !rets) { + return NULL; + } + empty_pn[0] = NULL; + empty_pt[0] = NULL; + rets[0] = ret; + rets[1] = NULL; + return cbm_type_func(arena, empty_pn, empty_pt, rets); +} + static void kt_register_cross_def(CBMTypeRegistry *reg, CBMArena *arena, const CBMLSPDef *d) { if (!d->qualified_name || !d->short_name || !d->label) { return; @@ -4185,6 +4264,7 @@ static void kt_register_cross_def(CBMTypeRegistry *reg, CBMArena *arena, const C /* receiver_type distinguishes a top-level fun (NULL) from a method * (set) — the sole-definer fallback only matches top-level funs. */ rf.receiver_type = d->receiver_type; + rf.signature = kt_cross_func_sig_with_return(arena, d); cbm_registry_add_func(reg, rf); } } diff --git a/src/pipeline/lsp_resolve.h b/src/pipeline/lsp_resolve.h index 5c66863d..d408e035 100644 --- a/src/pipeline/lsp_resolve.h +++ b/src/pipeline/lsp_resolve.h @@ -64,15 +64,51 @@ static inline const char *cbm_lsp_bare_segment(const char *name) { return seg; } +/* Tail helper: return the start of the final two dot-separated segments + * ("Class.method") or NULL when the QN is too short. */ +static inline const char *cbm_pipeline_qn_class_method_tail(const char *qn) { + if (!qn) { + return NULL; + } + const char *last = strrchr(qn, '.'); + if (!last || last == qn) { + return NULL; + } + const char *second = last; + while (second > qn) { + second--; + if (*second == '.') { + if (second == qn) { + return qn; + } + return second + 1; + } + } + return qn; +} + +static inline const char *cbm_pipeline_call_callee_leaf(const char *callee_name) { + return cbm_lsp_bare_segment(callee_name); +} + +static inline int cbm_pipeline_qn_class_method_tail_eq(const char *qn, const char *tail) { + const char *qt = cbm_pipeline_qn_class_method_tail(qn); + return qt && tail && strcmp(qt, tail) == 0; +} + /* Look up the highest-confidence LSP-resolved call entry whose caller QN * matches the textual call's enclosing function and whose callee QN * short-name matches the textual callee. Returns a pointer into `arr` * or NULL if no qualifying entry exists. * - * Match rule: the LSP emits CBMResolvedCall entries whose caller_qn - * matches the call's enclosing function and whose callee_qn ends with - * the textual callee_name as the last dot-separated segment. The - * pointer returned aliases into `arr` and stays valid as long as the + * Match rule: + * 1. exact caller_qn + callee short-name match wins first; + * 2. if no exact caller match exists, a unique Class.method tail + * match between rc->caller_qn and call->enclosing_func_qn may win; + * 3. ambiguous tails return NULL so the registry fallback stays in + * control. + * + * The pointer returned aliases into `arr` and stays valid as long as the * underlying CBMFileResult is alive. */ static inline const CBMResolvedCall *cbm_pipeline_find_lsp_resolution( const CBMResolvedCallArray *arr, const CBMCall *call) { @@ -82,7 +118,8 @@ static inline const CBMResolvedCall *cbm_pipeline_find_lsp_resolution( if (!call->enclosing_func_qn || !call->callee_name) { return NULL; } - const CBMResolvedCall *best = NULL; + + const CBMResolvedCall *best_exact = NULL; for (int i = 0; i < arr->count; i++) { const CBMResolvedCall *rc = &arr->items[i]; if (!rc->caller_qn || !rc->callee_qn) { @@ -124,26 +161,55 @@ static inline const CBMResolvedCall *cbm_pipeline_find_lsp_resolution( continue; } } - if (!best || rc->confidence > best->confidence) { - best = rc; + if (!best_exact || rc->confidence > best_exact->confidence) { + best_exact = rc; + } + } + if (best_exact) { + return best_exact; + } + + const char *call_tail = cbm_pipeline_qn_class_method_tail(call->enclosing_func_qn); + if (!call_tail) { + return NULL; + } + + const CBMResolvedCall *best_tail = NULL; + for (int i = 0; i < arr->count; i++) { + const CBMResolvedCall *rc = &arr->items[i]; + if (!rc->caller_qn || !rc->callee_qn) { + continue; + } + if (rc->confidence < CBM_LSP_CONFIDENCE_FLOOR) { + continue; + } + const char *short_name = strrchr(rc->callee_qn, '.'); + short_name = short_name ? short_name + SKIP_ONE : rc->callee_qn; + const char *call_leaf = cbm_pipeline_call_callee_leaf(call->callee_name); + if (!call_leaf || strcmp(short_name, call_leaf) != 0) { + continue; + } + if (!cbm_pipeline_qn_class_method_tail_eq(rc->caller_qn, call_tail)) { + continue; } + if (best_tail) { + return NULL; + } + best_tail = rc; } - return best; + return best_tail; } /* Resolve an LSP-emitted callee_qn to a graph-buffer node. * - * Per-file LSPs (notably py_lsp) sometimes emit `callee_qn` as the raw - * import-module path the source code uses (e.g. `greeter.Greeter` from - * `from greeter import Greeter`) rather than the project-qualified QN - * the gbuf actually stores (`.greeter.Greeter`). This is - * unavoidable at the per-file LSP layer: the LSP cannot tell in-project - * imports (qualify) from external imports (don't qualify, e.g. `os.path`) - * without consulting the gbuf, which is built downstream. - * - * The fallback rule: try the LSP-emitted QN as-is first; on miss, retry - * with `.`. If that also misses, the target is - * external/unknown and the caller drops the edge — same as today. + * Per-file LSPs sometimes emit `callee_qn` as the raw package-shaped + * import path the source code uses rather than the project-qualified QN + * the gbuf actually stores. The fallback rule is: + * 1. try the LSP-emitted QN as-is; + * 2. retry with `.` when needed; + * 3. if both fail, use the exact node-name index to narrow candidates + * by short method name and accept exactly one Function/Method whose + * qualified_name has the same Class.method tail. * * Returns the matching node, or NULL if neither lookup hits. */ static inline const cbm_gbuf_node_t *cbm_pipeline_lsp_target_node(const cbm_gbuf_t *gbuf, @@ -156,21 +222,50 @@ static inline const cbm_gbuf_node_t *cbm_pipeline_lsp_target_node(const cbm_gbuf if (direct) { return direct; } - if (!project_name || !project_name[0]) { - return NULL; + if (project_name && project_name[0]) { + size_t proj_len = strlen(project_name); + if (!(strncmp(callee_qn, project_name, proj_len) == 0 && callee_qn[proj_len] == '.')) { + char buf[CBM_SZ_1K]; + int written = snprintf(buf, sizeof(buf), "%s.%s", project_name, callee_qn); + if (written > 0 && (size_t)written < sizeof(buf)) { + const cbm_gbuf_node_t *prefixed = cbm_gbuf_find_by_qn(gbuf, buf); + if (prefixed) { + return prefixed; + } + } + } } - /* Skip the prefix retry if callee_qn is already project-qualified — - * avoids producing nonsense like `proj.proj.foo.Bar`. */ - size_t proj_len = strlen(project_name); - if (strncmp(callee_qn, project_name, proj_len) == 0 && callee_qn[proj_len] == '.') { + + const char *short_name = strrchr(callee_qn, '.'); + short_name = short_name ? short_name + SKIP_ONE : callee_qn; + const char *callee_tail = cbm_pipeline_qn_class_method_tail(callee_qn); + if (!callee_tail) { return NULL; } - char buf[CBM_SZ_1K]; - int written = snprintf(buf, sizeof(buf), "%s.%s", project_name, callee_qn); - if (written < 0 || (size_t)written >= sizeof(buf)) { + const cbm_gbuf_node_t **hits = NULL; + int hit_count = 0; + if (cbm_gbuf_find_by_name(gbuf, short_name, &hits, &hit_count) != 0 || hit_count == 0) { return NULL; } - return cbm_gbuf_find_by_qn(gbuf, buf); + + const cbm_gbuf_node_t *match = NULL; + for (int i = 0; i < hit_count; i++) { + const cbm_gbuf_node_t *cand = hits[i]; + if (!cand || !cand->label || !cand->qualified_name) { + continue; + } + if (strcmp(cand->label, "Function") != 0 && strcmp(cand->label, "Method") != 0) { + continue; + } + if (!cbm_pipeline_qn_class_method_tail_eq(cand->qualified_name, callee_tail)) { + continue; + } + if (match) { + return NULL; + } + match = cand; + } + return match; } #endif /* CBM_PIPELINE_LSP_RESOLVE_H */ diff --git a/src/pipeline/pass_lsp_cross.c b/src/pipeline/pass_lsp_cross.c index 31a7500a..ca378b87 100644 --- a/src/pipeline/pass_lsp_cross.c +++ b/src/pipeline/pass_lsp_cross.c @@ -138,20 +138,121 @@ static const char *pxc_join_pipe(CBMArena *arena, const char *const *items) { return buf; } +static bool pxc_is_jvm_lang(CBMLanguage lang); + +static const char *pxc_last_component(const char *qn) { + if (!qn) { + return NULL; + } + const char *dot = strrchr(qn, '.'); + return dot ? dot + 1 : qn; +} + +static const char *pxc_jvm_type_qn(CBMArena *arena, const char *namespace_name, + const char *type_qn_or_name) { + if (!arena || !namespace_name || !namespace_name[0] || !type_qn_or_name) { + return type_qn_or_name; + } + const char *short_name = pxc_last_component(type_qn_or_name); + if (!short_name || !short_name[0]) { + return type_qn_or_name; + } + return cbm_arena_sprintf(arena, "%s.%s", namespace_name, short_name); +} + +static const char *pxc_jvm_def_qn(CBMArena *arena, const CBMDefinition *src, + const char *namespace_name, const char *label) { + if (!arena || !src || !namespace_name || !namespace_name[0]) { + return src ? src->qualified_name : NULL; + } + if (strcmp(label, "Method") == 0 || strcmp(label, "Function") == 0 || + strcmp(label, "Constructor") == 0) { + if (src->parent_class && src->parent_class[0]) { + return cbm_arena_sprintf(arena, "%s.%s.%s", namespace_name, + pxc_last_component(src->parent_class), src->name); + } + return cbm_arena_sprintf(arena, "%s.%s", namespace_name, src->name); + } + return cbm_arena_sprintf(arena, "%s.%s", namespace_name, src->name); +} + +static const char *pxc_infer_jvm_namespace(CBMArena *arena, const char *rel_path, + CBMLanguage lang) { + if (!arena || !rel_path || !pxc_is_jvm_lang(lang)) { + return NULL; + } + const char *root = NULL; + const char *lang_root = lang == CBM_LANG_KOTLIN ? "kotlin/" : "java/"; + if (strncmp(rel_path, "src/main/", 9) == 0 && + strncmp(rel_path + 9, lang_root, strlen(lang_root)) == 0) { + root = rel_path + 9 + strlen(lang_root); + } else if (strncmp(rel_path, "src/test/", 9) == 0 && + strncmp(rel_path + 9, lang_root, strlen(lang_root)) == 0) { + root = rel_path + 9 + strlen(lang_root); + } else { + const char *needle = lang == CBM_LANG_KOTLIN ? "/kotlin/" : "/java/"; + root = strstr(rel_path, needle); + if (root) { + root += strlen(needle); + } else if (strncmp(rel_path, "src/", 4) == 0) { + root = rel_path + 4; + } else { + root = strstr(rel_path, "/src/"); + if (root) { + root += strlen("/src/"); + } + } + } + if (!root || !root[0]) { + return NULL; + } + if (strncmp(root, "main/", 5) == 0 || strncmp(root, "test/", 5) == 0) { + root += 5; + } + if (strncmp(root, "java/", 5) == 0) { + root += 5; + } else if (strncmp(root, "kotlin/", 7) == 0) { + root += 7; + } + const char *slash = strrchr(root, '/'); + if (!slash || slash <= root) { + return NULL; + } + size_t len = (size_t)(slash - root); + char *ns = (char *)cbm_arena_alloc(arena, len + 1); + if (!ns) { + return NULL; + } + memcpy(ns, root, len); + ns[len] = '\0'; + for (size_t i = 0; i < len; i++) { + if (ns[i] == '/') { + ns[i] = '.'; + } + } + return ns; +} + /* Convert one CBMDefinition into a CBMLSPDef. Returns 0 on success, -1 * to skip (unsupported label or missing required field). dst gets borrowed * pointers into src and into `arena` for synthesised composites. */ static int pxc_build_lsp_def(CBMArena *arena, const CBMDefinition *src, const char *module_qn, - CBMLanguage lang, CBMLSPDef *dst) { + const char *namespace_name, CBMLanguage lang, CBMLSPDef *dst) { const char *label = pxc_map_label(src->label); if (!label || !src->qualified_name || !src->name) return -1; memset(dst, 0, sizeof(*dst)); - dst->qualified_name = src->qualified_name; + if (pxc_is_jvm_lang(lang) && namespace_name && namespace_name[0]) { + dst->qualified_name = pxc_jvm_def_qn(arena, src, namespace_name, label); + dst->receiver_type = pxc_jvm_type_qn(arena, namespace_name, src->parent_class); + } else { + dst->qualified_name = src->qualified_name; + dst->receiver_type = src->parent_class; + } dst->short_name = src->name; dst->label = label; - dst->receiver_type = src->parent_class; dst->def_module_qn = module_qn; + dst->namespace_name = namespace_name; dst->is_interface = (strcmp(label, "Interface") == 0 || strcmp(label, "Protocol") == 0); /* Single return-type string. The per-language registrars split on '|' * for multi-return languages (Go); single-return languages just see one @@ -190,9 +291,17 @@ CBMLSPDef *cbm_pxc_collect_all_defs(CBMFileResult **cache, const cbm_file_info_t def_modules[fi] = cbm_pipeline_fqn_module_dir(project_name, files[fi].rel_path, pxc_module_is_dir(files[fi].language)); } + const char *namespace_name = cache[fi]->namespace_name; + if ((!namespace_name || !namespace_name[0]) && files[fi].rel_path) { + namespace_name = + pxc_infer_jvm_namespace(&cache[fi]->arena, files[fi].rel_path, files[fi].language); + if (namespace_name && namespace_name[0]) { + cache[fi]->namespace_name = namespace_name; + } + } for (int di = 0; di < cache[fi]->defs.count; di++) { if (pxc_build_lsp_def(&cache[fi]->arena, &cache[fi]->defs.items[di], def_modules[fi], - files[fi].language, &defs[idx]) == 0) { + namespace_name, files[fi].language, &defs[idx]) == 0) { idx++; } } @@ -644,7 +753,9 @@ typedef struct { } pxc_module_entry_t; struct CBMModuleDefIndex { - CBMHashTable *ht; /* module_qn → pxc_module_entry_t* */ + CBMHashTable *ht; /* module_qn → pxc_module_entry_t* */ + CBMHashTable *namespace_ht; /* declared package/namespace → pxc_module_entry_t* */ + int def_count; /* total entries in the all_defs[] array */ }; /* cbm_ht_foreach callback: free each pxc_module_entry_t. */ @@ -657,119 +768,187 @@ static void pxc_module_entry_free_cb(const char *key, void *value, void *userdat free(e->indices); free(e); } +static pxc_module_entry_t *pxc_module_entry_get_or_create(CBMHashTable *ht, const char *key) { + if (!ht || !key || !key[0]) { + return NULL; + } + pxc_module_entry_t *e = (pxc_module_entry_t *)cbm_ht_get(ht, key); + if (e) { + return e; + } + e = (pxc_module_entry_t *)calloc(1, sizeof(*e)); + if (!e) { + return NULL; + } + e->cap = 8; + e->indices = (int *)calloc((size_t)e->cap, sizeof(*e->indices)); + if (!e->indices) { + free(e); + return NULL; + } + cbm_ht_set(ht, key, e); + return e; +} + +static void pxc_module_entry_add_index(pxc_module_entry_t *e, int index) { + if (!e) { + return; + } + if (e->count >= e->cap) { + int new_cap = e->cap * 2; + int *new_indices = (int *)realloc(e->indices, (size_t)new_cap * sizeof(*new_indices)); + if (!new_indices) { + return; + } + e->indices = new_indices; + e->cap = new_cap; + } + e->indices[e->count++] = index; +} + +static bool pxc_is_jvm_lang(CBMLanguage lang); +static bool pxc_def_lang_matches(CBMLanguage caller_lang, CBMLanguage def_lang); + +static int pxc_mark_entry_defs(bool *selected, const pxc_module_entry_t *e, + const CBMLSPDef *all_defs, CBMLanguage caller_lang) { + if (!selected || !e) { + return 0; + } + int added = 0; + for (int j = 0; j < e->count; j++) { + int idx = e->indices[j]; + const CBMLSPDef *def = &all_defs[idx]; + if (!pxc_def_lang_matches(caller_lang, def->lang) || selected[idx]) { + continue; + } + selected[idx] = true; + added++; + } + return added; +} + +static bool pxc_is_jvm_lang(CBMLanguage lang) { + return lang == CBM_LANG_JAVA || lang == CBM_LANG_KOTLIN; +} + +static bool pxc_def_lang_matches(CBMLanguage caller_lang, CBMLanguage def_lang) { + if (pxc_is_jvm_lang(caller_lang)) { + return pxc_is_jvm_lang(def_lang); + } + return true; +} + +static void pxc_mark_module_defs(const CBMModuleDefIndex *idx, bool *selected, + const CBMLSPDef *all_defs, CBMLanguage caller_lang, + const char *module_qn, int *total) { + if (!idx || !idx->ht || !module_qn || !module_qn[0]) { + return; + } + pxc_module_entry_t *e = (pxc_module_entry_t *)cbm_ht_get(idx->ht, module_qn); + int added = pxc_mark_entry_defs(selected, e, all_defs, caller_lang); + if (total) { + *total += added; + } +} CBMModuleDefIndex *cbm_pxc_build_module_def_index(CBMLSPDef *all_defs, int def_count) { - if (!all_defs || def_count <= 0) + if (!all_defs || def_count <= 0) { return NULL; + } CBMHashTable *ht = cbm_ht_create(64); - if (!ht) + CBMHashTable *namespace_ht = cbm_ht_create(64); + if (!ht || !namespace_ht) { + cbm_ht_free(ht); + cbm_ht_free(namespace_ht); return NULL; + } - /* Single pass: append each def's index into its module's dynamic array. */ + /* Single pass: index each def by file module and by declared package. + * JVM mixed roots (`src/main/java` + `src/main/kotlin`) share the + * declared package, not the path-derived module prefix. */ for (int i = 0; i < def_count; i++) { - const char *mod = all_defs[i].def_module_qn; - if (!mod) - continue; - pxc_module_entry_t *e = (pxc_module_entry_t *)cbm_ht_get(ht, mod); - if (!e) { - e = (pxc_module_entry_t *)calloc(1, sizeof(*e)); - if (!e) - continue; - e->cap = 16; - e->indices = (int *)malloc((size_t)e->cap * sizeof(int)); - if (!e->indices) { - free(e); - continue; - } - cbm_ht_set(ht, mod, e); - } - if (e->count >= e->cap) { - int new_cap = e->cap * 2; - int *new_indices = (int *)realloc(e->indices, (size_t)new_cap * sizeof(int)); - if (!new_indices) - continue; /* drop this entry, keep going */ - e->indices = new_indices; - e->cap = new_cap; - } - e->indices[e->count++] = i; + pxc_module_entry_add_index(pxc_module_entry_get_or_create(ht, all_defs[i].def_module_qn), + i); + pxc_module_entry_add_index( + pxc_module_entry_get_or_create(namespace_ht, all_defs[i].namespace_name), i); } CBMModuleDefIndex *idx = (CBMModuleDefIndex *)calloc(1, sizeof(*idx)); if (!idx) { cbm_ht_foreach(ht, pxc_module_entry_free_cb, NULL); cbm_ht_free(ht); + cbm_ht_foreach(namespace_ht, pxc_module_entry_free_cb, NULL); + cbm_ht_free(namespace_ht); return NULL; } idx->ht = ht; + idx->namespace_ht = namespace_ht; + idx->def_count = def_count; return idx; } void cbm_pxc_free_module_def_index(CBMModuleDefIndex *idx) { - if (!idx) + if (!idx) { return; + } if (idx->ht) { cbm_ht_foreach(idx->ht, pxc_module_entry_free_cb, NULL); cbm_ht_free(idx->ht); } + if (idx->namespace_ht) { + cbm_ht_foreach(idx->namespace_ht, pxc_module_entry_free_cb, NULL); + cbm_ht_free(idx->namespace_ht); + } free(idx); } CBMLSPDef *cbm_pxc_filter_defs_for_file(const CBMModuleDefIndex *idx, CBMLSPDef *all_defs, + CBMLanguage caller_lang, const char *caller_namespace, const char *own_module, const char *const *imp_qns, int imp_count, int *out_count) { - if (out_count) + if (out_count) { *out_count = 0; - if (!idx || !idx->ht || !all_defs || !out_count) + } + if (!idx || !idx->ht || !all_defs || !out_count || idx->def_count <= 0) { return NULL; - - /* Dedup module list (own_module may appear in imp_qns). For typical - * imp_count ~10 this O(N²) scan is fine and avoids registering the - * same def twice in the per-file registry. */ - const char *seen[64]; - int seen_count = 0; - if (own_module) { - seen[seen_count++] = own_module; } - for (int i = 0; i < imp_count && seen_count < (int)(sizeof(seen) / sizeof(seen[0])); i++) { - if (!imp_qns[i]) - continue; - bool dup = false; - for (int s = 0; s < seen_count; s++) { - if (strcmp(seen[s], imp_qns[i]) == 0) { - dup = true; - break; - } - } - if (!dup) - seen[seen_count++] = imp_qns[i]; + + bool *selected = (bool *)calloc((size_t)idx->def_count, sizeof(*selected)); + if (!selected) { + return NULL; } - /* Pass 1: total relevant defs. */ int total = 0; - for (int s = 0; s < seen_count; s++) { - pxc_module_entry_t *e = (pxc_module_entry_t *)cbm_ht_get(idx->ht, seen[s]); - if (e) - total += e->count; + pxc_mark_module_defs(idx, selected, all_defs, caller_lang, own_module, &total); + for (int i = 0; i < imp_count; i++) { + pxc_mark_module_defs(idx, selected, all_defs, caller_lang, imp_qns[i], &total); + } + if (pxc_is_jvm_lang(caller_lang) && caller_namespace && caller_namespace[0] && + idx->namespace_ht) { + pxc_module_entry_t *e = + (pxc_module_entry_t *)cbm_ht_get(idx->namespace_ht, caller_namespace); + total += pxc_mark_entry_defs(selected, e, all_defs, caller_lang); } - if (total == 0) + + if (total == 0) { + free(selected); return NULL; + } - /* Pass 2: copy CBMLSPDef structs (string fields stay borrowed from the - * caller's all_defs[] arena). */ CBMLSPDef *out = (CBMLSPDef *)malloc((size_t)total * sizeof(CBMLSPDef)); - if (!out) + if (!out) { + free(selected); return NULL; + } int n = 0; - for (int s = 0; s < seen_count; s++) { - pxc_module_entry_t *e = (pxc_module_entry_t *)cbm_ht_get(idx->ht, seen[s]); - if (!e) - continue; - for (int j = 0; j < e->count; j++) { - out[n++] = all_defs[e->indices[j]]; + for (int i = 0; i < idx->def_count; i++) { + if (selected[i]) { + out[n++] = all_defs[i]; } } *out_count = n; + free(selected); return out; } diff --git a/src/pipeline/pass_lsp_cross.h b/src/pipeline/pass_lsp_cross.h index adca0801..e6d53619 100644 --- a/src/pipeline/pass_lsp_cross.h +++ b/src/pipeline/pass_lsp_cross.h @@ -13,7 +13,8 @@ * file LSP picks them up. * * Languages covered: Go, C/C++/CUDA, Python, TypeScript/JavaScript/JSX/ - * TSX, PHP, C#. Anything else short-circuits via cbm_pxc_has_cross_lsp. + * TSX, PHP, C#, and JVM (Java/Kotlin via the shared filter helper). + * Anything else short-circuits via cbm_pxc_has_cross_lsp. * * Previously this work ran as a separate sequential pipeline pass * (cbm_pipeline_pass_lsp_cross) that re-read every source file from @@ -65,15 +66,13 @@ void cbm_pxc_ts_modes(CBMLanguage lang, const char *rel_path, bool *out_js, bool * modules. gopls observed the same: it builds per-package summaries * and per-file only loads the summaries the file imports. * - * cbm_pxc_build_module_def_index() builds an inverted index once - * (O(D)) mapping def_module_qn → list of indices into all_defs[]. - * cbm_pxc_filter_defs_for_file() then returns a small CBMLSPDef[] - * containing ONLY the defs from own_module + imp_qns — typically - * 50-100× smaller than the global all_defs[]. - * - * Net: per-file registry build drops from O(all_defs) to O(relevant_ - * defs). On a Go file importing 10 packages, relevant ≈ 1-2k vs - * 110k → ~50× per-file speedup on the dominant cost. */ + * cbm_pxc_build_module_def_index() builds inverted indexes once (O(D)): + * def_module_qn → defs and declared namespace/package → defs. + * cbm_pxc_filter_defs_for_file() then returns own_module + imp_qns for + * most languages. For Java/Kotlin callers it additionally returns + * same-namespace JVM defs so Gradle/Maven mixed source roots + * (`src/main/java/...` + `src/main/kotlin/...`) resolve same-package + * references without falling back to a full project registry per file. */ typedef struct CBMModuleDefIndex CBMModuleDefIndex; CBMModuleDefIndex *cbm_pxc_build_module_def_index(CBMLSPDef *all_defs, int def_count); @@ -81,12 +80,15 @@ CBMModuleDefIndex *cbm_pxc_build_module_def_index(CBMLSPDef *all_defs, int def_c void cbm_pxc_free_module_def_index(CBMModuleDefIndex *idx); /* Return a malloc'd CBMLSPDef[] containing all defs whose - * def_module_qn matches own_module OR any of imp_qns. String fields - * inside each entry are borrowed from the original all_defs[] arena - * (caller keeps it alive). Caller frees the returned array with - * free(). Writes the entry count to *out_count. Returns NULL if no - * matches (with *out_count = 0). */ + * def_module_qn matches own_module OR any of imp_qns. For Java/Kotlin + * callers, also include defs from the same declared package/namespace: + * JVM same-package references often cross `src/main/java` and + * `src/main/kotlin` roots without import statements. String fields inside + * each entry are borrowed from the original all_defs[] arena (caller keeps + * it alive). Caller frees the returned array with free(). Writes the entry + * count to *out_count. Returns NULL if no matches (with *out_count = 0). */ CBMLSPDef *cbm_pxc_filter_defs_for_file(const CBMModuleDefIndex *idx, CBMLSPDef *all_defs, + CBMLanguage caller_lang, const char *caller_namespace, const char *own_module, const char *const *imp_qns, int imp_count, int *out_count); diff --git a/src/pipeline/pass_parallel.c b/src/pipeline/pass_parallel.c index fefcf736..857f0548 100644 --- a/src/pipeline/pass_parallel.c +++ b/src/pipeline/pass_parallel.c @@ -1805,9 +1805,11 @@ static void resolve_file_calls(resolve_ctx_t *rc, resolve_worker_state_t *ws, CB const CBMResolvedCall *lsp = NULL; _rc_t0 = extract_now_ns(); if (lsp_idx && call->enclosing_func_qn) { + const char *call_leaf = cbm_pipeline_call_callee_leaf(call->callee_name); char key[1024]; - int kn = - snprintf(key, sizeof(key), "%s|%s", call->enclosing_func_qn, call->callee_name); + int kn = call_leaf + ? snprintf(key, sizeof(key), "%s|%s", call->enclosing_func_qn, call_leaf) + : -1; if (kn > 0 && kn < (int)sizeof(key)) { lsp = (const CBMResolvedCall *)cbm_ht_get(lsp_idx, key); } @@ -2212,16 +2214,17 @@ static void resolve_worker(int worker_id, void *ctx_ptr) { /* Cross-file LSP is a per-file tree-sitter re-parse + AST walk + * registry lookups — ~50-150ms per file. It can ONLY find calls * that exist in the AST. If the per-file extract found zero calls, - * cross-LSP will too: the AST is the same. And if every call is - * already resolved (resolved_calls.count >= calls.count), there's - * nothing left for cross-LSP to improve. Skip in both cases — - * pure perf win, zero semantic loss. This is the smart-pruning - * pre-condition that brings down kubernetes resolve time - * dramatically (most files have no cross-file calls left to - * resolve once per-file LSP has run). */ + * cross-LSP will too: the AST is the same. For non-JVM languages, + * skip when per-file LSP already produced at least as many resolved + * entries as textual calls. Java/Kotlin per-file LSP can fill the + * count with constructors or same-file calls while a mixed-source-root + * Java↔Kotlin call remains unresolved, so JVM callers run whenever + * calls exist. */ + bool jvm_cross_lsp = (lang == CBM_LANG_JAVA || lang == CBM_LANG_KOTLIN); bool cross_lsp_eligible = (rc->all_defs && rc->def_count > 0 && cbm_pxc_has_cross_lsp(lang) && - result->calls.count > 0 && result->resolved_calls.count < result->calls.count && + result->calls.count > 0 && + (jvm_cross_lsp || result->resolved_calls.count < result->calls.count) && !is_generated); /* Skip files with nothing else to resolve and no cross-LSP work. */ @@ -2352,9 +2355,9 @@ static void resolve_worker(int worker_id, void *ctx_ptr) { CBMLSPDef *ts_filtered = NULL; if (rc->module_def_index) { int fc = 0; - ts_filtered = - cbm_pxc_filter_defs_for_file(rc->module_def_index, rc->all_defs, - def_module, imp_vals, imp_count, &fc); + ts_filtered = cbm_pxc_filter_defs_for_file( + rc->module_def_index, rc->all_defs, lang, result->namespace_name, + def_module, imp_vals, imp_count, &fc); if (ts_filtered) { ts_defs = ts_filtered; ts_def_count = fc; @@ -2380,11 +2383,11 @@ static void resolve_worker(int worker_id, void *ctx_ptr) { /* Fallback: gopls per-file filter + per-file registry build. */ CBMLSPDef *file_defs = rc->all_defs; int file_def_count = rc->def_count; + int filtered_count = 0; if (rc->module_def_index) { - int filtered_count = 0; - filtered = cbm_pxc_filter_defs_for_file(rc->module_def_index, rc->all_defs, - def_module, imp_vals, imp_count, - &filtered_count); + filtered = cbm_pxc_filter_defs_for_file( + rc->module_def_index, rc->all_defs, lang, result->namespace_name, + def_module, imp_vals, imp_count, &filtered_count); if (filtered) { file_defs = filtered; file_def_count = filtered_count; diff --git a/tests/test_parallel.c b/tests/test_parallel.c index 746e1f2c..be412d7f 100644 --- a/tests/test_parallel.c +++ b/tests/test_parallel.c @@ -529,6 +529,163 @@ static void count_lsp_call_edges(const cbm_gbuf_edge_t *edge, void *ud) { } } +static const char *class_method_tail(const char *qn) { + if (!qn) { + return NULL; + } + const char *last = strrchr(qn, '.'); + if (!last || last == qn) { + return NULL; + } + const char *second = last; + while (second > qn) { + second--; + if (*second == '.') { + return second == qn ? qn : second + 1; + } + } + return qn; +} + +static const cbm_gbuf_node_t *find_unique_callable_node_by_tail(const cbm_gbuf_t *gbuf, + const char *tail) { + const char *method = tail ? strrchr(tail, '.') : NULL; + method = method ? method + 1 : tail; + if (!gbuf || !tail || !method) { + return NULL; + } + const cbm_gbuf_node_t **nodes = NULL; + int count = 0; + if (cbm_gbuf_find_by_name(gbuf, method, &nodes, &count) != 0) { + return NULL; + } + const cbm_gbuf_node_t *match = NULL; + for (int i = 0; i < count; i++) { + const cbm_gbuf_node_t *node = nodes[i]; + if (!node || !node->label || !node->qualified_name) { + continue; + } + if (strcmp(node->label, "Method") != 0 && strcmp(node->label, "Function") != 0) { + continue; + } + const char *node_tail = class_method_tail(node->qualified_name); + if (!node_tail || strcmp(node_tail, tail) != 0) { + continue; + } + if (match) { + return NULL; + } + match = node; + } + return match; +} + +static const cbm_gbuf_edge_t *find_calls_edge_by_tails(const cbm_gbuf_t *gbuf, + const char *source_tail, + const char *target_tail) { + const cbm_gbuf_node_t *source = find_unique_callable_node_by_tail(gbuf, source_tail); + const cbm_gbuf_node_t *target = find_unique_callable_node_by_tail(gbuf, target_tail); + if (!source || !target) { + return NULL; + } + + const cbm_gbuf_edge_t **edges = NULL; + int count = 0; + if (cbm_gbuf_find_edges_by_source_type(gbuf, source->id, "CALLS", &edges, &count) != 0) { + return NULL; + } + for (int i = 0; i < count; i++) { + if (edges[i] && edges[i]->target_id == target->id) { + return edges[i]; + } + } + return NULL; +} + +TEST(parallel_java_kotlin_lsp_override_cross_file_emits_lsp_strategy_edges) { + char tmpdir[256]; + snprintf(tmpdir, sizeof(tmpdir), "/tmp/cbm_par_jvm_XXXXXX"); + if (!cbm_mkdtemp(tmpdir)) { + FAIL("mkdtemp failed"); + } + + char jpath[512]; + snprintf(jpath, sizeof(jpath), "%s/src/main/java/com/example/Example.java", tmpdir); + char jdir[512]; + snprintf(jdir, sizeof(jdir), "%s/src/main/java/com/example", tmpdir); + cbm_mkdir_p(jdir, 0755); + FILE *jf = fopen(jpath, "w"); + if (!jf) { + FAIL("fopen example.java failed"); + } + fprintf(jf, "package com.example;\n" + "\n" + "class JavaCaller {\n" + " String call(KotlinService kotlinService) {\n" + " return kotlinService.ping(new JavaService());\n" + " }\n" + "}\n" + "\n" + "class JavaService {\n" + " String pong() {\n" + " return \"pong\";\n" + " }\n" + "}\n"); + fclose(jf); + + char kpath[512]; + snprintf(kpath, sizeof(kpath), "%s/src/main/kotlin/com/example/KotlinService.kt", tmpdir); + char kdir[512]; + snprintf(kdir, sizeof(kdir), "%s/src/main/kotlin/com/example", tmpdir); + cbm_mkdir_p(kdir, 0755); + FILE *kf = fopen(kpath, "w"); + if (!kf) { + unlink(jpath); + rmdir(tmpdir); + FAIL("fopen example.kt failed"); + } + fprintf(kf, "package com.example\n" + "\n" + "class KotlinService {\n" + " fun ping(javaService: JavaService): String {\n" + " return javaService.pong()\n" + " }\n" + "}\n"); + fclose(kf); + + cbm_file_info_t files[2] = {0}; + files[0].path = jpath; + files[0].rel_path = (char *)"src/main/java/com/example/Example.java"; + files[0].language = CBM_LANG_JAVA; + files[1].path = kpath; + files[1].rel_path = (char *)"src/main/kotlin/com/example/KotlinService.kt"; + files[1].language = CBM_LANG_KOTLIN; + + cbm_gbuf_t *gbuf = run_parallel("com", tmpdir, files, 2, 2); + ASSERT_NOT_NULL(gbuf); + + const cbm_gbuf_edge_t *java_to_kotlin = + find_calls_edge_by_tails(gbuf, "JavaCaller.call", "KotlinService.ping"); + const cbm_gbuf_edge_t *kotlin_to_java = + find_calls_edge_by_tails(gbuf, "KotlinService.ping", "JavaService.pong"); + + ASSERT_NOT_NULL(java_to_kotlin); + ASSERT_NOT_NULL(kotlin_to_java); + ASSERT_NOT_NULL(java_to_kotlin->properties_json); + ASSERT_NOT_NULL(kotlin_to_java->properties_json); + ASSERT_NOT_NULL(strstr(java_to_kotlin->properties_json, "\"strategy\":\"lsp")); + ASSERT_NOT_NULL(strstr(kotlin_to_java->properties_json, "\"strategy\":\"lsp")); + ASSERT_TRUE(strstr(java_to_kotlin->properties_json, "\"strategy\":\"callee_suffix\"") == NULL); + ASSERT_TRUE(strstr(kotlin_to_java->properties_json, "\"strategy\":\"callee_suffix\"") == NULL); + + cbm_gbuf_free(gbuf); + unlink(kpath); + unlink(jpath); + rmdir(tmpdir); + PASS(); +} + + TEST(parallel_python_lsp_override_emits_lsp_strategy_edges) { char tmpdir[256]; snprintf(tmpdir, sizeof(tmpdir), "/tmp/cbm_par_pylsp_XXXXXX"); @@ -719,6 +876,7 @@ SUITE(parallel) { RUN_TEST(parallel_node_count); RUN_TEST(parallel_python_lsp_override_emits_lsp_strategy_edges); RUN_TEST(parallel_python_lsp_override_cross_file_emits_lsp_strategy_edges); + RUN_TEST(parallel_java_kotlin_lsp_override_cross_file_emits_lsp_strategy_edges); RUN_TEST(parallel_calls_parity); RUN_TEST(parallel_defines_parity); RUN_TEST(parallel_defines_method_parity);