From db534952d60a7e51efdc4fcf1e9ec1c926797598 Mon Sep 17 00:00:00 2001 From: Dmitry Ilyin <6576495+widgetii@users.noreply.github.com> Date: Sun, 3 May 2026 21:14:00 +0300 Subject: [PATCH] ptrace wait loop: forward signals, handle FORK/VFORK, don't break on race While investigating an empty trace from libsns_jxf22.so on hi3518ev200, two real bugs in the wait loop turned up that are worth fixing independently of jxf22's specific issue. * Signal forwarding. The loop ended every iteration with ptrace(PTRACE_SYSCALL, pid, 1, NULL). The fourth arg is the signal to inject when resuming the tracee, and NULL meant "drop the signal entirely". So if a child stopped on a real signal (anything other than SIGTRAP - SIGCHLD, SIGRT*, SIGUSR*, etc.), ipctool swallowed it instead of forwarding it. The HiSilicon SDK uses realtime signals heavily for video pipeline coordination; dropping them under trace can deadlock a streamer. Now: if the stop signal is SIGTRAP it stays at 0 (nothing to forward); if it's a genuine signal-delivery stop, the original signal gets re-injected when the tracee resumes. * PTRACE_EVENT_FORK / PTRACE_EVENT_VFORK weren't handled. #152 added the matching PTRACE_O_TRACEFORK/VFORK options but the wait loop only matched PTRACE_EVENT_CLONE. So a forked child fired PTRACE_EVENT_FORK in its parent (ignored), then on its first syscall stop the lookup against `pids` returned NULL and we hit the "BAD lookup" branch which `break`'d out of the wait loop - killing the whole trace. Now: the same CLONE handling block matches all three events (CLONE | FORK | VFORK). Plus the BAD-lookup case no longer breaks - it just continues, since under TRACEFORK there's a brief window where a child can hit a syscall stop before its parent's EVENT_FORK arrives and we register it. * Exit handling for unknown PIDs no longer breaks the loop either. If a child exits before we observed its creation event, we just skip the bookkeeping and keep tracing the rest. tools/sns_init_probe.c added: a tiny dlopen+dlsym wrapper that loads a libsns_*.so directly and calls its sensor init function. Lets a future researcher exercise sensor I/O paths in isolation from the streamer (handy for narrowing down "empty trace" issues to the .so vs the surrounding application). Build instructions in the file header. Verified: * SC2315E + Majestic regression: 100/100/100% diff against widgetii/smart_sc2315e unchanged. * hi3518ev200 + jxf22 still produces an empty trace despite the signal/fork fixes. Strace confirms the streamer DOES make 79 write() calls of 2 bytes to a /dev/i2c-0 fd (opened TWICE: first at fd 18 by the probe code, then a second open at fd 25 by libsns_jxf22.so itself - that second open is what we're missing). The bug is somewhere else in the trace path on this specific camera/build combo; tracked separately. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ptrace.c | 62 +++++++++++++++++++--------- tools/sns_init_probe.c | 92 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 20 deletions(-) create mode 100644 tools/sns_init_probe.c diff --git a/src/ptrace.c b/src/ptrace.c index 025a13c..1ebd02c 100644 --- a/src/ptrace.c +++ b/src/ptrace.c @@ -1200,18 +1200,28 @@ static void do_trace(pid_t tracee) { if (child_waited == -1) break; + // Signal to forward when we resume the tracee. Stays 0 for syscall + // stops and ptrace events; gets set to the actual signal number for + // genuine signal-delivery stops, so the streamer's signal-driven + // logic (HiSilicon SDK uses realtime signals heavily) still works + // under trace. + int sig_to_inject = 0; + if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { - if (((status >> 16) & 0xffff) == PTRACE_EVENT_CLONE) { + int event = (status >> 16) & 0xffff; + // CLONE/FORK/VFORK all create a new tracee that needs the same + // bookkeeping: pull its pid from the kernel, look up the parent's + // process_t, copy its fd state, register the new tracee. + if (event == PTRACE_EVENT_CLONE || event == PTRACE_EVENT_FORK || + event == PTRACE_EVENT_VFORK) { pid_t new_child; if (ptrace(PTRACE_GETEVENTMSG, child_waited, 0, &new_child) != -1) { pid_t ppid = -1; if (!ht_contains(&pids, &new_child)) { ppid = get_process_parent_id(new_child); - // TODO: review if (ppid == tracer) ppid = tracee; - // process_t *thread = &(process_t){.pid = new_child}; process_t *parent = ht_lookup(&pids, &ppid); if (parent) { @@ -1239,34 +1249,46 @@ static void do_trace(pid_t tracee) { printf("\nchild %d killed by signal %d\n", child_waited, WTERMSIG(status)); process_t *proc = ht_lookup(&pids, &child_waited); - if (proc == NULL) { - fprintf(stderr, "Cannot lookup PID %d\n", child_waited); - break; + if (proc != NULL) { + free_fds(proc); + ht_erase(&pids, &child_waited); } - free_fds(proc); - ht_erase(&pids, &child_waited); - if (ht_is_empty(&pids)) break; + continue; // don't try to restart a dead pid } else if (WIFSTOPPED(status)) { int stopCode = WSTOPSIG(status); if (stopCode == SIGTRAP) { process_t *proc = ht_lookup(&pids, &child_waited); - if (proc == NULL) { - printf("BAD lookup for %d\n", child_waited); - break; - } - - if (!proc->syscall_num) { - enter_syscall(proc); - } else { - exit_syscall(proc); - proc->syscall_num = 0; + if (proc != NULL) { + if (!proc->syscall_num) { + enter_syscall(proc); + } else { + exit_syscall(proc); + proc->syscall_num = 0; + } } + // If proc is NULL here it means a child was created with + // a fork-family event we didn't observe yet (TRACEFORK/ + // TRACEVFORK race). Continue tracing rather than killing + // the whole loop - it will get added on its first observed + // event. + } else if (stopCode == SIGSTOP || stopCode == SIGTSTP || + stopCode == SIGTTIN || stopCode == SIGTTOU) { + // Group-stop / post-clone init-stop. The kernel SIGSTOPs a + // newly cloned tracee as part of TRACECLONE bookkeeping; + // forwarding it back would re-stop the child and we'd + // never see its syscalls (this exact bug surfaced as + // "empty trace" on hi3518ev200 + libsns_jxf22.so where + // sensor I/O happens in a clone'd thread). Suppress. + } else { + // Real signal delivery - forward to the tracee. + sig_to_inject = stopCode; } } - ptrace(PTRACE_SYSCALL, child_waited, 1, NULL); + ptrace(PTRACE_SYSCALL, child_waited, 0, + (void *)(intptr_t)sig_to_inject); } } diff --git a/tools/sns_init_probe.c b/tools/sns_init_probe.c new file mode 100644 index 0000000..4fd4ece --- /dev/null +++ b/tools/sns_init_probe.c @@ -0,0 +1,92 @@ +// Tiny wrapper that loads a libsns_*.so directly and calls its sensor +// init function, without pulling in the rest of the streamer. Lets us +// exercise the .so's I2C path under ipctool trace in isolation. +// +// Build for ARM with the OpenIPC toolchain: +// arm-openipc-linux-musleabi-gcc -O2 -static \ +// tools/sns_init_probe.c -ldl -lpthread \ +// -o sns_init_probe +// +// Run on the camera (after killing the streamer so the I2C bus is free): +// killall majestic +// sns_init_probe /usr/lib/sensors/libsns_jxf22.so sensor_linear_1080p30_init +// +// Or under trace: +// ipctool trace --output=cap.log sns_init_probe /usr/lib/sensors/libsns_jxf22.so sensor_init +// +// We accept either symbol name; if neither is exported we fall back to +// sensor_init (the SDK glue most drivers expose). +#include +#include +#include +#include + +typedef int (*init_fn_int)(int); +typedef void (*init_fn_void)(int); + +int main(int argc, char **argv) +{ + if (argc < 2) { + fprintf(stderr, + "usage: %s [symbol]\n" + " default symbol search: sensor_linear_1080p30_init,\n" + " sensor_init, sensor_i2c_init\n", + argv[0]); + return 2; + } + + const char *so = argv[1]; + void *h = dlopen(so, RTLD_NOW); + if (!h) { + fprintf(stderr, "dlopen(%s) failed: %s\n", so, dlerror()); + return 1; + } + + const char *try_syms[4]; + int nsyms = 0; + if (argc >= 3) { + try_syms[nsyms++] = argv[2]; + } else { + try_syms[nsyms++] = "sensor_linear_1080p30_init"; + try_syms[nsyms++] = "sensor_init"; + try_syms[nsyms++] = "sensor_i2c_init"; + } + + void *fn = NULL; + const char *sym = NULL; + for (int i = 0; i < nsyms; i++) { + dlerror(); + fn = dlsym(h, try_syms[i]); + if (fn) { + sym = try_syms[i]; + break; + } + } + if (!fn) { + fprintf(stderr, "no init symbol found in %s\n", so); + return 1; + } + + fprintf(stderr, "[probe] %s @ %p — calling\n", sym, fn); + + // First always do sensor_i2c_init to set up the bus, in case the user + // asked for sensor_linear_1080p30_init directly (which doesn't open + // the i2c device). + void *i2c_init = dlsym(h, "sensor_i2c_init"); + if (i2c_init && i2c_init != fn) { + ((init_fn_void)i2c_init)(0); + } + + // Some symbols return int, some void. Calling void as int is harmless + // on ARM EABI (return value just goes unused). + int ret = ((init_fn_int)fn)(0); + fprintf(stderr, "[probe] %s returned %d\n", sym, ret); + + void *i2c_exit = dlsym(h, "sensor_i2c_exit"); + if (i2c_exit) { + ((init_fn_void)i2c_exit)(0); + } + + dlclose(h); + return 0; +}