diff --git a/Makefile.cbm b/Makefile.cbm index f52d4fce..2f4bc7cd 100644 --- a/Makefile.cbm +++ b/Makefile.cbm @@ -385,6 +385,10 @@ TEST_SECURITY_SRCS = tests/test_security.c TEST_YAML_SRCS = tests/test_yaml.c +TEST_SEMANTIC_SRCS = tests/test_semantic.c +TEST_AST_PROFILE_SRCS = tests/test_ast_profile.c +TEST_SLAB_ALLOC_SRCS = tests/test_slab_alloc.c + TEST_SIMHASH_SRCS = tests/test_simhash.c TEST_STACK_OVERFLOW_SRCS = tests/test_stack_overflow.c @@ -447,7 +451,7 @@ TEST_REPRO_SRCS = \ tests/repro/repro_lsp_java_cs.c \ tests/repro/repro_lsp_kt_php_rust.c -ALL_TEST_SRCS =$(TEST_FOUNDATION_SRCS) $(TEST_EXTRACTION_SRCS) $(TEST_STORE_SRCS) $(TEST_CYPHER_SRCS) $(TEST_MCP_SRCS) $(TEST_DISCOVER_SRCS) $(TEST_GRAPH_BUFFER_SRCS) $(TEST_PIPELINE_SRCS) $(TEST_WATCHER_SRCS) $(TEST_LZ4_SRCS) $(TEST_ZSTD_SRCS) $(TEST_ARTIFACT_SRCS) $(TEST_SQLITE_WRITER_SRCS) $(TEST_GO_LSP_SRCS) $(TEST_C_LSP_SRCS) $(TEST_PHP_LSP_SRCS) $(TEST_CS_LSP_SRCS) $(TEST_CS_LSP_BENCH_SRCS) $(TEST_SCOPE_SRCS) $(TEST_TYPE_REP_SRCS) $(TEST_PY_LSP_SRCS) $(TEST_PY_LSP_BENCH_SRCS) $(TEST_PY_LSP_STRESS_SRCS) $(TEST_PY_LSP_SCALE_SRCS) $(TEST_TS_LSP_SRCS) $(TEST_JAVA_LSP_SRCS) $(TEST_KOTLIN_LSP_SRCS) $(TEST_RUST_LSP_SRCS) $(TEST_TRACES_SRCS) $(TEST_CLI_SRCS) $(TEST_MEM_SRCS) $(TEST_UI_SRCS) $(TEST_HTTPD_SRCS) $(TEST_SECURITY_SRCS) $(TEST_YAML_SRCS) $(TEST_SIMHASH_SRCS) $(TEST_STACK_OVERFLOW_SRCS) $(TEST_INTEGRATION_SRCS) +ALL_TEST_SRCS =$(TEST_FOUNDATION_SRCS) $(TEST_EXTRACTION_SRCS) $(TEST_STORE_SRCS) $(TEST_CYPHER_SRCS) $(TEST_MCP_SRCS) $(TEST_DISCOVER_SRCS) $(TEST_GRAPH_BUFFER_SRCS) $(TEST_PIPELINE_SRCS) $(TEST_WATCHER_SRCS) $(TEST_LZ4_SRCS) $(TEST_ZSTD_SRCS) $(TEST_ARTIFACT_SRCS) $(TEST_SQLITE_WRITER_SRCS) $(TEST_GO_LSP_SRCS) $(TEST_C_LSP_SRCS) $(TEST_PHP_LSP_SRCS) $(TEST_CS_LSP_SRCS) $(TEST_CS_LSP_BENCH_SRCS) $(TEST_SCOPE_SRCS) $(TEST_TYPE_REP_SRCS) $(TEST_PY_LSP_SRCS) $(TEST_PY_LSP_BENCH_SRCS) $(TEST_PY_LSP_STRESS_SRCS) $(TEST_PY_LSP_SCALE_SRCS) $(TEST_TS_LSP_SRCS) $(TEST_JAVA_LSP_SRCS) $(TEST_KOTLIN_LSP_SRCS) $(TEST_RUST_LSP_SRCS) $(TEST_TRACES_SRCS) $(TEST_CLI_SRCS) $(TEST_MEM_SRCS) $(TEST_UI_SRCS) $(TEST_HTTPD_SRCS) $(TEST_SECURITY_SRCS) $(TEST_YAML_SRCS) $(TEST_SEMANTIC_SRCS) $(TEST_AST_PROFILE_SRCS) $(TEST_SLAB_ALLOC_SRCS) $(TEST_SIMHASH_SRCS) $(TEST_STACK_OVERFLOW_SRCS) $(TEST_INTEGRATION_SRCS) # ── Build directories ──────────────────────────────────────────── diff --git a/tests/test_ast_profile.c b/tests/test_ast_profile.c new file mode 100644 index 00000000..a715ce08 --- /dev/null +++ b/tests/test_ast_profile.c @@ -0,0 +1,177 @@ +/* + * test_ast_profile.c — Unit tests for ast_profile.c (serialization/deserialization). + * + * Covers: to_str, from_str round-trip, to_vector, edge cases. + */ +#include "test_framework.h" +#include + +#include + +/* ── Helper ──────────────────────────────────────────────────────── */ + +static cbm_ast_profile_t make_profile(void) { + cbm_ast_profile_t p; + memset(&p, 0, sizeof(p)); + p.if_count = 5; + p.for_count = 3; + p.while_count = 1; + p.switch_count = 2; + p.try_count = 0; + p.return_count = 4; + p.max_nesting_depth = 3; + p.avg_nesting_depth_x10 = 15; + p.comparison_ops = 10; + p.arithmetic_ops = 8; + p.logical_ops = 2; + p.assignment_count = 7; + p.string_literals = 3; + p.number_literals = 5; + p.bool_literals = 1; + p.param_count = 4; + p.params_in_returns = 2; + p.params_in_conditions = 1; + p.variable_reassigns = 3; + p.unique_operators = 12; + p.unique_operands = 20; + p.total_operators = 45; + p.total_operands = 60; + p.body_lines = 30; + p.body_tokens = 200; + return p; +} + +/* ── to_str + from_str round-trip ────────────────────────────────── */ + +TEST(ast_profile_roundtrip) { + cbm_ast_profile_t original = make_profile(); + char buf[200]; + cbm_ast_profile_to_str(&original, buf, sizeof(buf)); + + cbm_ast_profile_t decoded; + ASSERT_TRUE(cbm_ast_profile_from_str(buf, &decoded)); + + ASSERT_EQ(original.if_count, decoded.if_count); + ASSERT_EQ(original.for_count, decoded.for_count); + ASSERT_EQ(original.while_count, decoded.while_count); + ASSERT_EQ(original.switch_count, decoded.switch_count); + ASSERT_EQ(original.try_count, decoded.try_count); + ASSERT_EQ(original.return_count, decoded.return_count); + ASSERT_EQ(original.max_nesting_depth, decoded.max_nesting_depth); + ASSERT_EQ(original.avg_nesting_depth_x10, decoded.avg_nesting_depth_x10); + ASSERT_EQ(original.comparison_ops, decoded.comparison_ops); + ASSERT_EQ(original.arithmetic_ops, decoded.arithmetic_ops); + ASSERT_EQ(original.logical_ops, decoded.logical_ops); + ASSERT_EQ(original.assignment_count, decoded.assignment_count); + ASSERT_EQ(original.string_literals, decoded.string_literals); + ASSERT_EQ(original.number_literals, decoded.number_literals); + ASSERT_EQ(original.bool_literals, decoded.bool_literals); + ASSERT_EQ(original.param_count, decoded.param_count); + ASSERT_EQ(original.params_in_returns, decoded.params_in_returns); + ASSERT_EQ(original.params_in_conditions, decoded.params_in_conditions); + ASSERT_EQ(original.variable_reassigns, decoded.variable_reassigns); + ASSERT_EQ(original.unique_operators, decoded.unique_operators); + ASSERT_EQ(original.unique_operands, decoded.unique_operands); + ASSERT_EQ(original.total_operators, decoded.total_operators); + ASSERT_EQ(original.total_operands, decoded.total_operands); + ASSERT_EQ(original.body_lines, decoded.body_lines); + ASSERT_EQ(original.body_tokens, decoded.body_tokens); + PASS(); +} + +TEST(ast_profile_to_str_null) { + char buf[200]; + cbm_ast_profile_to_str(NULL, buf, sizeof(buf)); /* should not crash */ + PASS(); +} + +TEST(ast_profile_to_str_small_buf) { + cbm_ast_profile_t p = make_profile(); + /* buf too small: should write empty string */ + char buf[1] = {'X'}; + cbm_ast_profile_to_str(&p, buf, 0); + /* 0-length buffer: function should handle gracefully */ + PASS(); +} + +TEST(ast_profile_from_str_null) { + ASSERT_FALSE(cbm_ast_profile_from_str(NULL, NULL)); + PASS(); +} + +TEST(ast_profile_from_str_invalid) { + cbm_ast_profile_t out; + ASSERT_FALSE(cbm_ast_profile_from_str("not,a,valid,string", &out)); + ASSERT_FALSE(cbm_ast_profile_from_str("", &out)); + PASS(); +} + +TEST(ast_profile_from_str_too_few_fields) { + cbm_ast_profile_t out; + /* Only 5 fields instead of 25 */ + ASSERT_FALSE(cbm_ast_profile_from_str("1,2,3,4,5", &out)); + PASS(); +} + +/* ── to_vector ───────────────────────────────────────────────────── */ + +TEST(ast_profile_to_vector_range) { + cbm_ast_profile_t p = make_profile(); + float vec[25]; + cbm_ast_profile_to_vector(&p, vec); + /* All values should be in [0, 1] range (normalized) */ + for (int i = 0; i < 25; i++) { + ASSERT_GTE(vec[i], 0.0f); + ASSERT_LTE(vec[i], 1.0f); + } + PASS(); +} + +TEST(ast_profile_to_vector_zero) { + cbm_ast_profile_t p; + memset(&p, 0, sizeof(p)); + float vec[25]; + cbm_ast_profile_to_vector(&p, vec); + /* All zeros should produce all-zero vector */ + for (int i = 0; i < 25; i++) { + ASSERT_FLOAT_EQ(vec[i], 0.0f, 0.001f); + } + PASS(); +} + +TEST(ast_profile_to_vector_null) { + float vec[25]; + memset(vec, 0xFF, sizeof(vec)); + cbm_ast_profile_to_vector(NULL, vec); /* should not crash or write */ + PASS(); +} + +/* ── to_vector extremes (saturation) ─────────────────────────────── */ + +TEST(ast_profile_to_vector_saturates) { + cbm_ast_profile_t p; + memset(&p, 0, sizeof(p)); + /* count / MAX_COUNT (100) → can exceed 1.0; body_tokens / MAX_TOKENS (2000) = 2.5 */ + p.if_count = 500; + p.body_tokens = 5000; + float vec[25]; + cbm_ast_profile_to_vector(&p, vec); + ASSERT_TRUE(vec[0] > 1.0f); + ASSERT_TRUE(vec[24] > 1.0f); + PASS(); +} + +/* ── Suite ───────────────────────────────────────────────────────── */ + +SUITE(ast_profile) { + RUN_TEST(ast_profile_roundtrip); + RUN_TEST(ast_profile_to_str_null); + RUN_TEST(ast_profile_to_str_small_buf); + RUN_TEST(ast_profile_from_str_null); + RUN_TEST(ast_profile_from_str_invalid); + RUN_TEST(ast_profile_from_str_too_few_fields); + RUN_TEST(ast_profile_to_vector_range); + RUN_TEST(ast_profile_to_vector_zero); + RUN_TEST(ast_profile_to_vector_null); + RUN_TEST(ast_profile_to_vector_saturates); +} diff --git a/tests/test_main.c b/tests/test_main.c index 824b91fe..af54272e 100644 --- a/tests/test_main.c +++ b/tests/test_main.c @@ -120,6 +120,9 @@ extern void suite_grammar_probe_e(void); extern void suite_grammar_probe_f(void); extern void suite_grammar_probe_g(void); extern void suite_incremental(void); +extern void suite_semantic(void); +extern void suite_ast_profile(void); +extern void suite_slab_alloc(void); extern void suite_simhash(void); extern void suite_stack_overflow(void); extern void suite_dump_verify(void); @@ -239,6 +242,7 @@ int main(int argc, char **argv) { RUN_SELECTED_SUITE(parallel); /* mem + arena + slab integration */ + RUN_SELECTED_SUITE(slab_alloc); RUN_SELECTED_SUITE(mem); /* UI (config, embedded assets, layout) */ @@ -254,6 +258,8 @@ int main(int argc, char **argv) { RUN_SELECTED_SUITE(yaml); /* SimHash / SIMILAR_TO */ + RUN_SELECTED_SUITE(semantic); + RUN_SELECTED_SUITE(ast_profile); RUN_SELECTED_SUITE(simhash); /* Stack overflow regression (GitHub #199) */ diff --git a/tests/test_semantic.c b/tests/test_semantic.c new file mode 100644 index 00000000..d7d2066d --- /dev/null +++ b/tests/test_semantic.c @@ -0,0 +1,376 @@ +/* + * test_semantic.c — Unit tests for semantic.c (pure functions). + * + * Covers: tokenize, cosine, normalize, vec_add_scaled, random_index, + * proximity, diffuse, corpus lifecycle, get_config. + */ +#include "test_framework.h" +#include + +#include +#include +#include + +/* ── Tokenize ────────────────────────────────────────────────────── */ + +TEST(sem_tokenize_camel) { + char *tokens[32]; + int n = cbm_sem_tokenize("parseUserInput", tokens, 32); + ASSERT_GTE(n, 3); + ASSERT_STR_EQ(tokens[0], "parse"); + ASSERT_STR_EQ(tokens[1], "user"); + ASSERT_STR_EQ(tokens[2], "input"); + for (int i = 0; i < n; i++) free(tokens[i]); + PASS(); +} + +TEST(sem_tokenize_snake) { + char *tokens[32]; + int n = cbm_sem_tokenize("handle_http_request", tokens, 32); + ASSERT_GTE(n, 3); + ASSERT_STR_EQ(tokens[0], "handle"); + ASSERT_STR_EQ(tokens[1], "http"); + ASSERT_STR_EQ(tokens[2], "request"); + for (int i = 0; i < n; i++) free(tokens[i]); + PASS(); +} + +TEST(sem_tokenize_dot) { + char *tokens[32]; + int n = cbm_sem_tokenize("net.http.client", tokens, 32); + ASSERT_GTE(n, 3); + ASSERT_STR_EQ(tokens[0], "net"); + ASSERT_STR_EQ(tokens[1], "http"); + ASSERT_STR_EQ(tokens[2], "client"); + for (int i = 0; i < n; i++) free(tokens[i]); + PASS(); +} + +TEST(sem_tokenize_null) { + int n = cbm_sem_tokenize(NULL, NULL, 0); + ASSERT_EQ(n, 0); + PASS(); +} + +TEST(sem_tokenize_max_out) { + char *tokens[3]; + int n = cbm_sem_tokenize("a_b_c_d_e_f_g", tokens, 3); + ASSERT_EQ(n, 3); + for (int i = 0; i < n; i++) free(tokens[i]); + PASS(); +} + +TEST(sem_tokenize_abbrev_expansion) { + char *tokens[32]; + int n = cbm_sem_tokenize("getCtxErrMsg", tokens, 32); + /* get, ctx, context, err, error, msg, message */ + ASSERT_GTE(n, 4); + bool has_ctx = false, has_context = false, has_err = false, has_error = false; + for (int i = 0; i < n; i++) { + if (strcmp(tokens[i], "ctx") == 0) has_ctx = true; + if (strcmp(tokens[i], "context") == 0) has_context = true; + if (strcmp(tokens[i], "err") == 0) has_err = true; + if (strcmp(tokens[i], "error") == 0) has_error = true; + } + ASSERT_TRUE(has_ctx && has_context && has_err && has_error); + for (int i = 0; i < n; i++) free(tokens[i]); + PASS(); +} + +/* ── Cosine similarity ───────────────────────────────────────────── */ + +static void fill_vec(cbm_sem_vec_t *v, float val) { + for (int i = 0; i < CBM_SEM_DIM; i++) v->v[i] = val; +} + +TEST(sem_cosine_identical) { + cbm_sem_vec_t a, b; + fill_vec(&a, 0.5f); + fill_vec(&b, 0.5f); + float sim = cbm_sem_cosine(&a, &b); + ASSERT_FLOAT_EQ(sim, 1.0f, 0.001f); + PASS(); +} + +TEST(sem_cosine_orthogonal) { + cbm_sem_vec_t a, b; + memset(&a, 0, sizeof(a)); + memset(&b, 0, sizeof(b)); + a.v[0] = 1.0f; + b.v[1] = 1.0f; + float sim = cbm_sem_cosine(&a, &b); + ASSERT_FLOAT_EQ(sim, 0.0f, 0.001f); + PASS(); +} + +TEST(sem_cosine_zero_vector) { + cbm_sem_vec_t a, b; + memset(&a, 0, sizeof(a)); + fill_vec(&b, 1.0f); + float sim = cbm_sem_cosine(&a, &b); + ASSERT_FLOAT_EQ(sim, 0.0f, 0.001f); + PASS(); +} + +TEST(sem_cosine_negative) { + cbm_sem_vec_t a, b; + memset(&a, 0, sizeof(a)); + memset(&b, 0, sizeof(b)); + a.v[0] = 1.0f; + b.v[0] = -1.0f; + float sim = cbm_sem_cosine(&a, &b); + ASSERT_FLOAT_EQ(sim, -1.0f, 0.001f); + PASS(); +} + +TEST(sem_cosine_null) { + ASSERT_FLOAT_EQ(cbm_sem_cosine(NULL, NULL), 0.0f, 0.001f); + PASS(); +} + +/* ── Normalize ───────────────────────────────────────────────────── */ + +TEST(sem_normalize_unit) { + cbm_sem_vec_t v; + memset(&v, 0, sizeof(v)); + v.v[0] = 1.0f; + cbm_sem_normalize(&v); + ASSERT_FLOAT_EQ(cbm_sem_cosine(&v, &v), 1.0f, 0.001f); + PASS(); +} + +TEST(sem_normalize_scales) { + cbm_sem_vec_t v; + fill_vec(&v, 2.0f); + cbm_sem_normalize(&v); + float mag_sq = 0.0f; + for (int i = 0; i < CBM_SEM_DIM; i++) mag_sq += v.v[i] * v.v[i]; + float mag = sqrtf(mag_sq); + ASSERT_FLOAT_EQ(mag, 1.0f, 0.01f); + PASS(); +} + +TEST(sem_normalize_zero) { + cbm_sem_vec_t v; + memset(&v, 0, sizeof(v)); + cbm_sem_normalize(&v); + /* Should remain zero (no division by zero) */ + PASS(); +} + +TEST(sem_normalize_null) { + cbm_sem_normalize(NULL); /* should not crash */ + PASS(); +} + +/* ── Vec add scaled ──────────────────────────────────────────────── */ + +TEST(sem_vec_add_scaled_basic) { + cbm_sem_vec_t dst; + memset(&dst, 0, sizeof(dst)); + cbm_sem_vec_t src; + fill_vec(&src, 1.0f); + cbm_sem_vec_add_scaled(&dst, &src, 0.5f); + ASSERT_FLOAT_EQ(dst.v[0], 0.5f, 0.001f); + ASSERT_FLOAT_EQ(dst.v[CBM_SEM_DIM - 1], 0.5f, 0.001f); + PASS(); +} + +TEST(sem_vec_add_scaled_null) { + cbm_sem_vec_t v; + fill_vec(&v, 1.0f); + cbm_sem_vec_add_scaled(NULL, &v, 1.0f); /* should not crash */ + cbm_sem_vec_add_scaled(&v, NULL, 1.0f); /* should not crash */ + PASS(); +} + +/* ── Random index ────────────────────────────────────────────────── */ + +TEST(sem_random_index_deterministic) { + cbm_sem_vec_t a, b; + cbm_sem_random_index("hello", &a); + cbm_sem_random_index("hello", &b); + ASSERT_FLOAT_EQ(cbm_sem_cosine(&a, &b), 1.0f, 0.001f); + PASS(); +} + +TEST(sem_random_index_different_tokens) { + cbm_sem_vec_t a, b; + cbm_sem_random_index("function", &a); + cbm_sem_random_index("variable", &b); + /* Different tokens should produce different vectors */ + float sim = cbm_sem_cosine(&a, &b); + ASSERT_TRUE(sim < 1.0f - 1e-6f); + PASS(); +} + +TEST(sem_random_index_null) { + cbm_sem_vec_t v; + memset(&v, 0, sizeof(v)); + cbm_sem_random_index(NULL, &v); + /* Should produce zero vector for NULL token */ + for (int i = 0; i < CBM_SEM_DIM; i++) { + ASSERT_FLOAT_EQ(v.v[i], 0.0f, 0.001f); + } + PASS(); +} + +/* ── Proximity ───────────────────────────────────────────────────── */ + +TEST(sem_proximity_same_file) { + float p = cbm_sem_proximity("src/main.c", "src/main.c"); + ASSERT_FLOAT_EQ(p, 1.1f, 0.01f); /* CBM_SEM_UNIT_POS + CBM_SEM_PROX_MAX_BOOST */ + PASS(); +} + +TEST(sem_proximity_same_dir) { + /* Files sharing 1 of 2 directory components: ratio = 0.5 → 1.0 + 0.5*0.10 = 1.05 */ + float p = cbm_sem_proximity("src/core/a.c", "src/io/b.c"); + ASSERT_TRUE(p > 1.0f && p < 1.10f); + PASS(); +} + +TEST(sem_proximity_different_paths) { + float p = cbm_sem_proximity("src/foo/a.c", "tests/bar/b.c"); + ASSERT_FLOAT_EQ(p, 1.0f, 0.01f); + PASS(); +} + +TEST(sem_proximity_null) { + ASSERT_FLOAT_EQ(cbm_sem_proximity(NULL, "foo.c"), 1.0f, 0.01f); + ASSERT_FLOAT_EQ(cbm_sem_proximity("foo.c", NULL), 1.0f, 0.01f); + PASS(); +} + +/* ── Diffuse ─────────────────────────────────────────────────────── */ + +TEST(sem_diffuse_zero_neighbors) { + cbm_sem_vec_t v; + fill_vec(&v, 0.5f); + cbm_sem_diffuse(&v, NULL, 0, 0.3f); + /* With zero neighbors, vector should be unchanged */ + ASSERT_FLOAT_EQ(v.v[0], 0.5f, 0.001f); + PASS(); +} + +TEST(sem_diffuse_single_neighbor) { + cbm_sem_vec_t v; + memset(&v, 0, sizeof(v)); + v.v[0] = 0.5f; + v.v[1] = 0.5f; + cbm_sem_normalize(&v); /* unit-length input */ + cbm_sem_vec_t nb; + memset(&nb, 0, sizeof(nb)); + nb.v[0] = 1.0f; + cbm_sem_normalize(&nb); + cbm_sem_diffuse(&v, &nb, 1, 0.3f); + /* After diffuse+normalize, result should still be unit-length */ + float mag_sq = 0.0f; + for (int i = 0; i < CBM_SEM_DIM; i++) mag_sq += v.v[i] * v.v[i]; + ASSERT_FLOAT_EQ(sqrtf(mag_sq), 1.0f, 0.01f); + /* Component 0 should be pulled toward neighbor's strong dim-0 */ + ASSERT_TRUE(v.v[0] > 0.0f); + PASS(); +} + +/* ── Corpus lifecycle ────────────────────────────────────────────── */ + +TEST(sem_corpus_new_free) { + cbm_sem_corpus_t *c = cbm_sem_corpus_new(); + ASSERT_NOT_NULL(c); + ASSERT_EQ(cbm_sem_corpus_doc_count(c), 0); + ASSERT_EQ(cbm_sem_corpus_token_count(c), 0); + cbm_sem_corpus_free(c); + PASS(); +} + +TEST(sem_corpus_add_one_doc) { + cbm_sem_corpus_t *c = cbm_sem_corpus_new(); + ASSERT_NOT_NULL(c); + const char *tokens[] = {"parse", "user", "input"}; + cbm_sem_corpus_add_doc(c, tokens, 3); + ASSERT_EQ(cbm_sem_corpus_doc_count(c), 1); + ASSERT_TRUE(cbm_sem_corpus_token_count(c) > 0); + cbm_sem_corpus_free(c); + PASS(); +} + +TEST(sem_corpus_idf) { + cbm_sem_corpus_t *c = cbm_sem_corpus_new(); + ASSERT_NOT_NULL(c); + const char *doc1[] = {"a", "b", "c"}; + const char *doc2[] = {"a", "d", "e"}; + cbm_sem_corpus_add_doc(c, doc1, 3); + cbm_sem_corpus_add_doc(c, doc2, 3); + /* IDF for "a" (appears in 2 docs): log(2/2) = log(1) = 0 */ + float idf_a = cbm_sem_corpus_idf(c, "a"); + ASSERT_TRUE(idf_a < 0.01f); + /* IDF for "b" (appears in 1 doc): log(2/1) > 0 */ + float idf_b = cbm_sem_corpus_idf(c, "b"); + ASSERT_TRUE(idf_b > 0.0f); + cbm_sem_corpus_free(c); + PASS(); +} + +TEST(sem_corpus_add_null_doc) { + cbm_sem_corpus_t *c = cbm_sem_corpus_new(); + ASSERT_NOT_NULL(c); + cbm_sem_corpus_add_doc(c, NULL, 0); + cbm_sem_corpus_add_doc(c, NULL, -1); + ASSERT_EQ(cbm_sem_corpus_doc_count(c), 0); + cbm_sem_corpus_free(c); + PASS(); +} + +TEST(sem_corpus_free_null) { + cbm_sem_corpus_free(NULL); /* should not crash */ + PASS(); +} + +/* ── Config ──────────────────────────────────────────────────────── */ + +TEST(sem_get_config_defaults) { + cbm_sem_config_t cfg = cbm_sem_get_config(); + ASSERT_TRUE(cfg.w_tfidf > 0.0f); + ASSERT_TRUE(cfg.w_ri > 0.0f); + ASSERT_TRUE(cfg.threshold > 0.0f); + ASSERT_TRUE(cfg.max_edges > 0); + PASS(); +} + +/* ── Suite ───────────────────────────────────────────────────────── */ + +SUITE(semantic) { + RUN_TEST(sem_tokenize_camel); + RUN_TEST(sem_tokenize_snake); + RUN_TEST(sem_tokenize_dot); + RUN_TEST(sem_tokenize_null); + RUN_TEST(sem_tokenize_max_out); + RUN_TEST(sem_tokenize_abbrev_expansion); + RUN_TEST(sem_cosine_identical); + RUN_TEST(sem_cosine_orthogonal); + RUN_TEST(sem_cosine_zero_vector); + RUN_TEST(sem_cosine_negative); + RUN_TEST(sem_cosine_null); + RUN_TEST(sem_normalize_unit); + RUN_TEST(sem_normalize_scales); + RUN_TEST(sem_normalize_zero); + RUN_TEST(sem_normalize_null); + RUN_TEST(sem_vec_add_scaled_basic); + RUN_TEST(sem_vec_add_scaled_null); + RUN_TEST(sem_random_index_deterministic); + RUN_TEST(sem_random_index_different_tokens); + RUN_TEST(sem_random_index_null); + RUN_TEST(sem_proximity_same_file); + RUN_TEST(sem_proximity_same_dir); + RUN_TEST(sem_proximity_different_paths); + RUN_TEST(sem_proximity_null); + RUN_TEST(sem_diffuse_zero_neighbors); + RUN_TEST(sem_diffuse_single_neighbor); + RUN_TEST(sem_corpus_new_free); + RUN_TEST(sem_corpus_add_one_doc); + RUN_TEST(sem_corpus_idf); + RUN_TEST(sem_corpus_add_null_doc); + RUN_TEST(sem_corpus_free_null); + RUN_TEST(sem_get_config_defaults); +} diff --git a/tests/test_slab_alloc.c b/tests/test_slab_alloc.c new file mode 100644 index 00000000..94b22da3 --- /dev/null +++ b/tests/test_slab_alloc.c @@ -0,0 +1,195 @@ +/* + * test_slab_alloc.c — Unit tests for slab_alloc.c. + * + * Covers: slab malloc/free/calloc/realloc for ≤64B and >64B paths, + * realloc grow/shrink, zeroing, ownership, thread reset. + */ +#include "test_framework.h" +#include + +#include +#include +#include + +/* ── Constants from slab_alloc.c ─────────────────────────────────── */ +enum { SLAB_CHUNK_SIZE = 64 }; + +/* ── Alloc → free (small, slab path) ─────────────────────────────── */ + +TEST(slab_malloc_free_small) { + void *p = cbm_slab_test_malloc(32); + ASSERT_NOT_NULL(p); + memset(p, 0xAA, 32); + cbm_slab_test_free(p); + PASS(); +} + +TEST(slab_malloc_free_large) { + void *p = cbm_slab_test_malloc(128); + ASSERT_NOT_NULL(p); + memset(p, 0xBB, 128); + cbm_slab_test_free(p); + PASS(); +} + +TEST(slab_malloc_zero_size) { + /* malloc(0) should return non-NULL on most platforms */ + void *p = cbm_slab_test_malloc(0); + ASSERT_NOT_NULL(p); + cbm_slab_test_free(p); + PASS(); +} + +/* ── Calloc ──────────────────────────────────────────────────────── */ + +TEST(slab_calloc_zeros) { + void *p = cbm_slab_test_calloc(10, 8); + ASSERT_NOT_NULL(p); + unsigned char *b = p; + for (int i = 0; i < 80; i++) { + ASSERT_EQ(b[i], 0); + } + cbm_slab_test_free(p); + PASS(); +} + +TEST(slab_calloc_overflows) { + void *p = cbm_slab_test_calloc(SIZE_MAX, 2); + ASSERT_NULL(p); + PASS(); +} + +/* ── Realloc ─────────────────────────────────────────────────────── */ + +TEST(slab_realloc_null_is_malloc) { + void *p = cbm_slab_test_realloc(NULL, 64); + ASSERT_NOT_NULL(p); + cbm_slab_test_free(p); + PASS(); +} + +TEST(slab_realloc_shrink_to_zero) { + void *p = cbm_slab_test_malloc(32); + ASSERT_NOT_NULL(p); + void *q = cbm_slab_test_realloc(p, 0); /* frees and returns NULL */ + ASSERT_NULL(q); + PASS(); +} + +TEST(slab_realloc_small_keep_same) { + void *p = cbm_slab_test_malloc(16); + ASSERT_NOT_NULL(p); + memset(p, 0xCC, 16); + /* Realloc within slab size: same pointer, content preserved */ + void *q = cbm_slab_test_realloc(p, 32); + ASSERT_EQ(p, q); + unsigned char *b = q; + ASSERT_EQ(b[0], 0xCC); + cbm_slab_test_free(q); + PASS(); +} + +TEST(slab_realloc_promote_to_heap) { + void *p = cbm_slab_test_malloc(32); + ASSERT_NOT_NULL(p); + memset(p, 0xDD, 32); + /* Realloc above slab threshold: new ptr, content copied */ + void *q = cbm_slab_test_realloc(p, 128); + ASSERT_NOT_NULL(q); + unsigned char *b = q; + ASSERT_EQ(b[0], 0xDD); + ASSERT_EQ(b[31], 0xDD); + cbm_slab_test_free(q); + PASS(); +} + +/* ── Multiple allocations (free list cycling) ────────────────────── */ + +TEST(slab_many_small_allocs) { + void *ptrs[64]; + for (int i = 0; i < 64; i++) { + ptrs[i] = cbm_slab_test_malloc(SLAB_CHUNK_SIZE); + ASSERT_NOT_NULL(ptrs[i]); + memset(ptrs[i], (int)(i & 0xFF), SLAB_CHUNK_SIZE); + } + /* Free all: they go back to free list */ + for (int i = 0; i < 64; i++) { + cbm_slab_test_free(ptrs[i]); + } + /* Re-allocate from free list: same pointer pool recycled */ + void *again = cbm_slab_test_malloc(SLAB_CHUNK_SIZE); + ASSERT_NOT_NULL(again); + cbm_slab_test_free(again); + PASS(); +} + +/* ── Distinct pointer addresses ──────────────────────────────────── */ + +TEST(slab_distinct_allocations) { + void *a = cbm_slab_test_malloc(SLAB_CHUNK_SIZE); + void *b = cbm_slab_test_malloc(SLAB_CHUNK_SIZE); + void *c = cbm_slab_test_malloc(SLAB_CHUNK_SIZE); + ASSERT_NOT_NULL(a); + ASSERT_NOT_NULL(b); + ASSERT_NOT_NULL(c); + ASSERT_NEQ(a, b); + ASSERT_NEQ(b, c); + ASSERT_NEQ(a, c); + cbm_slab_test_free(a); + cbm_slab_test_free(b); + cbm_slab_test_free(c); + PASS(); +} + +/* ── Large allocation (heap path, >64B) ──────────────────────────── */ + +TEST(slab_large_alloc_free) { + void *p = cbm_slab_test_malloc(1024); + ASSERT_NOT_NULL(p); + cbm_slab_test_free(p); + PASS(); +} + +TEST(slab_large_calloc_zero) { + void *p = cbm_slab_test_calloc(64, 16); + ASSERT_NOT_NULL(p); + unsigned char *b = p; + ASSERT_EQ(b[0], 0); + ASSERT_EQ(b[1023], 0); + cbm_slab_test_free(p); + PASS(); +} + +/* ── Reset thread ────────────────────────────────────────────────── */ + +TEST(slab_reset_thread) { + /* Allocate then reset: should not crash */ + void *p = cbm_slab_test_malloc(32); + ASSERT_NOT_NULL(p); + cbm_slab_test_free(p); + cbm_slab_reset_thread(); + /* After reset, new allocs should still work */ + void *q = cbm_slab_test_malloc(32); + ASSERT_NOT_NULL(q); + cbm_slab_test_free(q); + PASS(); +} + +/* ── Suite ───────────────────────────────────────────────────────── */ + +SUITE(slab_alloc) { + RUN_TEST(slab_malloc_free_small); + RUN_TEST(slab_malloc_free_large); + RUN_TEST(slab_malloc_zero_size); + RUN_TEST(slab_calloc_zeros); + RUN_TEST(slab_calloc_overflows); + RUN_TEST(slab_realloc_null_is_malloc); + RUN_TEST(slab_realloc_shrink_to_zero); + RUN_TEST(slab_realloc_small_keep_same); + RUN_TEST(slab_realloc_promote_to_heap); + RUN_TEST(slab_many_small_allocs); + RUN_TEST(slab_distinct_allocations); + RUN_TEST(slab_large_alloc_free); + RUN_TEST(slab_large_calloc_zero); + RUN_TEST(slab_reset_thread); +}