Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion caddy/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,13 @@ func (admin *FrankenPHPAdmin) restartWorkers(w http.ResponseWriter, r *http.Requ
return admin.error(http.StatusMethodNotAllowed, fmt.Errorf("method not allowed"))
}

frankenphp.RestartWorkers()
if err := frankenphp.RestartWorkers(); err != nil {
// Restart is incomplete: at least one worker thread was stuck in
// an uninterruptible blocking call and did not reload code. Do
// not let the admin endpoint lie to automation with a 200.
caddy.Log().Sugar().Errorf("workers restart incomplete: %v", err)
return admin.error(http.StatusInternalServerError, err)
}
caddy.Log().Info("workers restarted from admin api")
admin.success(w, "workers restarted successfully\n")

Expand Down
223 changes: 212 additions & 11 deletions frankenphp.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,148 @@ static bool is_forked_child = false;
static void frankenphp_fork_child(void) { is_forked_child = true; }
#endif

/* Best-effort force-kill for PHP threads after the graceful-drain grace
* period. Each thread captures pointers to its own executor_globals'
* vm_interrupt and timed_out atomic bools at boot and hands them back to
* Go via go_frankenphp_store_force_kill_slot. From any goroutine, the
* Go side passes that slot back to frankenphp_force_kill_thread, which
* stores true into both bools, waking the VM at the next opcode boundary
* and unwinding the thread through zend_timeout().
*
* On platforms with POSIX realtime signals (Linux, FreeBSD), force-kill
* also delivers SIGRTMIN+3 to the target thread so any in-flight blocking
* syscall (select, sleep, nanosleep, blocking I/O without SA_RESTART)
* returns EINTR and the VM gets a chance to observe the atomic bools on
* the next opcode. On Windows, CancelSynchronousIo + QueueUserAPC does
* the equivalent for alertable I/O and SleepEx. Non-alertable Sleep()
* (including PHP's usleep on Windows) stays uninterruptible - the VM
* must wait for it to return naturally before bailing.
*
* macOS has no realtime signals exposed to user-space, so the atomic
* bool path is the only mechanism there: threads busy-looping in PHP
* are killed promptly, threads stuck in blocking syscalls wait to
* return on their own.
*
* JIT caveat: when the OPcache JIT is enabled, some hot code paths do
* not check vm_interrupt between opcodes. A thread stuck in a
* JIT-compiled busy loop may not observe the atomic-bool store at all
* (see https://github.com/php/php-src/issues/21267). The syscall-
* interruption path (signal -> EINTR) still works since the kernel
* wakes the thread regardless of JIT state, so the regression surface
* is pure-PHP busy loops under JIT. Those fall through to the abandon
* path after forceKillDeadline.
*
* Signal number reservation: SIGRTMIN+3 is reserved by FrankenPHP for
* force-kill. If a PHP user script registers its own handler via
* pcntl_signal(SIGRTMIN+3, ...), it clobbers ours and force-kill stops
* working for threads it runs on. Projects embedding FrankenPHP
* alongside their own Go code that also uses that signal must choose a
* different one here. glibc's NPTL reserves SIGRTMIN..SIGRTMIN+2 for
* its own use, so do not move this offset downward.
*
* The slot lives in the Go-side phpThread struct - there is no C-side
* array or init/destroy dance. Signal handler installation happens once
* via pthread_once the first time a thread registers. */
#ifdef PHP_WIN32
static void CALLBACK frankenphp_noop_apc(ULONG_PTR param) { (void)param; }
#endif

#ifdef FRANKENPHP_HAS_KILL_SIGNAL
/* No-op handler: signal delivery is sufficient on its own because it
* forces the in-flight syscall to return EINTR. The VM then observes
* vm_interrupt on the next opcode and unwinds via zend_timeout(). */
static void frankenphp_kill_signal_handler(int sig) { (void)sig; }

static pthread_once_t kill_signal_handler_installed = PTHREAD_ONCE_INIT;
static void install_kill_signal_handler(void) {
Comment thread
nicolas-grekas marked this conversation as resolved.
/* Install the no-op handler process-wide without SA_RESTART so blocking
* syscalls return EINTR when the signal is delivered rather than being
* transparently restarted by libc. SA_ONSTACK is set defensively: the
* signal targets non-Go pthreads via pthread_kill, but if it's ever
* delivered to a Go-managed thread (e.g. through accidental process-
* level raise), Go requires the handler to run on the alternate signal
* stack to avoid corrupting the goroutine's. */
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = frankenphp_kill_signal_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = SA_ONSTACK;
sigaction(FRANKENPHP_KILL_SIGNAL, &sa, NULL);
}
#endif

/* shutdown_in_progress is toggled by the Go side through
* frankenphp_set_shutdown_in_progress(). It is the only honest signal the
* unhealthy-thread restart path has to tell "we are tearing the runtime
* down, do not respawn" apart from normal operation - thread_metrics is
* never NULL anymore because Shutdown intentionally leaves it allocated
* for abandoned threads still writing into it. */
static zend_atomic_bool shutdown_in_progress;

void frankenphp_set_shutdown_in_progress(bool v) {
zend_atomic_bool_store(&shutdown_in_progress, v);
}

/* Called by each PHP thread at boot, from its own TSRM context, so that
* the EG-backed addresses resolve to the thread's private executor_globals
* and the captured thread identity refers to itself. Hands the slot to
* the Go side via go_frankenphp_store_force_kill_slot; the slot's
* lifetime is the phpThread's. */
void frankenphp_register_thread_for_kill(uintptr_t idx) {
force_kill_slot slot;
memset(&slot, 0, sizeof(slot));
slot.vm_interrupt = &EG(vm_interrupt);
slot.timed_out = &EG(timed_out);
#ifdef FRANKENPHP_HAS_KILL_SIGNAL
slot.tid = pthread_self();
pthread_once(&kill_signal_handler_installed, install_kill_signal_handler);
#elif defined(PHP_WIN32)
if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
GetCurrentProcess(), &slot.thread_handle, 0, FALSE,
DUPLICATE_SAME_ACCESS)) {
/* DuplicateHandle can fail under resource pressure; leave the handle
* NULL so force_kill_thread falls back to the atomic-bool path only. */
slot.thread_handle = NULL;
}
#endif
go_frankenphp_store_force_kill_slot(idx, slot);
}

void frankenphp_force_kill_thread(force_kill_slot slot) {
if (slot.vm_interrupt == NULL) {
/* Thread never reached register_thread_for_kill (aborted during boot). */
return;
}
/* Set the atomic bools first so that by the time the thread wakes up -
* whether from our signal/APC or naturally - the VM sees them and
* routes through zend_timeout() -> "Maximum execution time exceeded". */
zend_atomic_bool_store(slot.timed_out, true);
zend_atomic_bool_store(slot.vm_interrupt, true);

#ifdef FRANKENPHP_HAS_KILL_SIGNAL
/* Return value intentionally ignored: ESRCH (thread already exited) and
* EINVAL are both benign - there is simply nothing to unblock. */
pthread_kill(slot.tid, FRANKENPHP_KILL_SIGNAL);
#elif defined(PHP_WIN32)
if (slot.thread_handle != NULL) {
CancelSynchronousIo(slot.thread_handle);
QueueUserAPC((PAPCFUNC)frankenphp_noop_apc, slot.thread_handle, 0);
}
#endif
}

/* Releases any OS resource tied to the slot (currently: CloseHandle on
* Windows). Called by the Go side when a phpThread is torn down. */
void frankenphp_release_thread_for_kill(force_kill_slot slot) {
#ifdef PHP_WIN32
if (slot.thread_handle != NULL) {
CloseHandle(slot.thread_handle);
}
#else
(void)slot;
#endif
}

void frankenphp_update_local_thread_context(bool is_worker) {
is_worker_thread = is_worker;

Expand Down Expand Up @@ -1065,6 +1207,23 @@ static void *php_thread(void *arg) {
snprintf(thread_name, 16, "php-%" PRIxPTR, thread_index);
set_thread_name(thread_name);

/* Tell the Go side a new native thread is entering the main loop so
* initPHPThreads can Wait() for abandoned threads from a previous
* Init cycle to fully unwind before reassigning phpThreads. Paired
* with go_frankenphp_thread_exited() at the single exit: label below. */
go_frankenphp_thread_spawned();

#ifdef FRANKENPHP_HAS_KILL_SIGNAL
/* pthread_create inherits the caller's signal mask. frankenphp_new_php_thread
* is typically called from a goroutine pinned to a Go-managed M whose mask
* may block realtime signals. Explicitly unblock FRANKENPHP_KILL_SIGNAL so
* force-kill delivery is not silently discarded on this thread. */
sigset_t unblock;
sigemptyset(&unblock);
sigaddset(&unblock, FRANKENPHP_KILL_SIGNAL);
pthread_sigmask(SIG_UNBLOCK, &unblock, NULL);
#endif

/* Initial allocation of all global PHP memory for this thread */
#ifdef ZTS
(void)ts_resource(0);
Expand All @@ -1073,6 +1232,11 @@ static void *php_thread(void *arg) {
#endif
#endif

/* Register this thread's vm_interrupt/timed_out addresses so the Go side
* can force-kill it after the graceful-drain grace period if it gets stuck
* in a busy PHP loop. */
frankenphp_register_thread_for_kill(thread_index);

bool thread_is_healthy = true;
bool has_attempted_shutdown = false;

Expand Down Expand Up @@ -1150,6 +1314,15 @@ static void *php_thread(void *arg) {
}
zend_end_try();

/* Clear the force-kill slot BEFORE ts_free_thread: that call frees
* the TSRM storage that &EG(vm_interrupt) / &EG(timed_out) point at.
* Clearing afterwards (even under a write lock) would leave a window
* where a concurrent delivery reads the still-populated slot and
* writes into freed memory. Applies to both the healthy exit and the
* unhealthy-restart path below so every call to force_kill_thread
* sees either a valid or a zero-valued slot. */
go_frankenphp_clear_force_kill_slot(thread_index);

/* free all global PHP memory reserved for this thread */
#ifdef ZTS
ts_free_thread();
Expand All @@ -1158,19 +1331,33 @@ static void *php_thread(void *arg) {
/* Thread is healthy, signal to Go that the thread has shut down */
if (thread_is_healthy) {
go_frankenphp_on_thread_shutdown(thread_index);

return NULL;
goto exit;
}

/* Thread is unhealthy, PHP globals might be in a bad state after a bailout,
* restart the entire thread */
* restart the entire thread - unless the Go side has already declared the
* runtime to be shutting down via frankenphp_set_shutdown_in_progress().
* Respawning past that point would hand a fresh pthread a phpThreads
* slice that drainPHPThreads has already stopped tracking. */
if (zend_atomic_bool_load(&shutdown_in_progress)) {
frankenphp_log_message(
"Unhealthy thread unwinding after Shutdown; not restarting",
LOG_WARNING);
goto exit;
}
frankenphp_log_message("Restarting unhealthy thread", LOG_WARNING);

if (!frankenphp_new_php_thread(thread_index)) {
/* probably unreachable */
frankenphp_log_message("Failed to restart an unhealthy thread", LOG_ERR);
}

exit:
/* Single exit point: every path above that took the spawned() Add must
* route through here so lingeringThreads.Wait() in initPHPThreads can
* observe termination. Adding a new return above without going through
* exit would leak one Add across Init cycles. */
go_frankenphp_thread_exited();
return NULL;
}

Expand Down Expand Up @@ -1265,17 +1452,25 @@ static void *php_main(void *arg) {

go_frankenphp_main_thread_is_ready();

/* channel closed, shutdown gracefully */
frankenphp_sapi_module.shutdown(&frankenphp_sapi_module);

sapi_shutdown();
/* channel closed, shutdown gracefully. If an abandoned PHP thread is
* still alive in a blocked syscall (RestartWorkers/Shutdown gave up
* after the force-kill deadline), wait a bounded window for it to
* unwind before running SAPI/TSRM teardown. On timeout, skip teardown
* entirely so the late-unwinding thread cannot touch freed state via
* ts_free_thread / php_request_shutdown (zend_catch) / SAPI callbacks.
* Process exit will reclaim the leaked state. */
if (go_frankenphp_can_teardown()) {
frankenphp_sapi_module.shutdown(&frankenphp_sapi_module);

sapi_shutdown();
#ifdef ZTS
tsrm_shutdown();
tsrm_shutdown();
#endif

if (frankenphp_sapi_module.ini_entries) {
free((char *)frankenphp_sapi_module.ini_entries);
frankenphp_sapi_module.ini_entries = NULL;
if (frankenphp_sapi_module.ini_entries) {
free((char *)frankenphp_sapi_module.ini_entries);
frankenphp_sapi_module.ini_entries = NULL;
}
}

go_frankenphp_shutdown_main_thread();
Expand Down Expand Up @@ -1470,6 +1665,12 @@ int frankenphp_reset_opcache(void) {
int frankenphp_get_current_memory_limit() { return PG(memory_limit); }

void frankenphp_init_thread_metrics(int max_threads) {
/* Free any allocation left over from a prior Init: Shutdown no longer
* calls frankenphp_destroy_thread_metrics (abandoned threads may still
* be writing into the array when the blocked syscall unwinds), but
* initPHPThreads waits on lingeringThreads before reaching us so any
* such abandoned thread has already exited by the time we reallocate. */
free(thread_metrics);
thread_metrics = calloc(max_threads, sizeof(frankenphp_thread_metrics));
}

Expand Down
10 changes: 10 additions & 0 deletions frankenphp.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,16 @@ func Init(options ...Option) error {

mainThread, err := initPHPThreads(opt.numThreads, opt.maxThreads, opt.phpIni)
if err != nil {
// ErrTeardownSkipped means a prior Shutdown already tore down
// everything it could and latched the teardown-skipped flag;
// nothing new has been started in this Init call, so calling
// Shutdown here would re-enter drainPHPThreads and double-close
// the previous generation's already-closed mainThread.done. Just
// reset the running flag so ServeHTTP returns ErrNotRunning.
if errors.Is(err, ErrTeardownSkipped) {
isRunning = false
return err
}
Shutdown()
return err
}
Expand Down
35 changes: 35 additions & 0 deletions frankenphp.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,28 @@ static inline HRESULT LongLongSub(LONGLONG llMinuend, LONGLONG llSubtrahend,
#include <stdbool.h>
#include <stdint.h>

#ifndef PHP_WIN32
#include <pthread.h>
#include <signal.h>
#endif

/* Platform capabilities for the force-kill primitive; declared in the
* header so Go (via CGo) gets the correct struct layout too. */
#if !defined(PHP_WIN32) && defined(SIGRTMIN)
#define FRANKENPHP_HAS_KILL_SIGNAL 1
#define FRANKENPHP_KILL_SIGNAL (SIGRTMIN + 3)
#endif

typedef struct {
zend_atomic_bool *vm_interrupt;
zend_atomic_bool *timed_out;
#ifdef FRANKENPHP_HAS_KILL_SIGNAL
pthread_t tid;
#elif defined(PHP_WIN32)
HANDLE thread_handle;
#endif
} force_kill_slot;

#ifndef FRANKENPHP_VERSION
#define FRANKENPHP_VERSION dev
#endif
Expand Down Expand Up @@ -193,6 +215,19 @@ void frankenphp_init_thread_metrics(int max_threads);
void frankenphp_destroy_thread_metrics(void);
size_t frankenphp_get_thread_memory_usage(uintptr_t thread_index);

/* Best-effort force-kill primitives. The slot is populated by each PHP
* thread at boot (frankenphp_register_thread_for_kill calls back into Go
* via go_frankenphp_store_force_kill_slot) and lives in the Go-side
* phpThread. force_kill_thread interrupts the Zend VM at the next opcode
* boundary; on POSIX it also delivers SIGRTMIN+3 to the target thread,
* on Windows it calls CancelSynchronousIo + QueueUserAPC. release_thread
* drops any OS-owned resource tied to the slot (currently the Windows
* thread handle). */
void frankenphp_set_shutdown_in_progress(bool v);
void frankenphp_register_thread_for_kill(uintptr_t thread_index);
void frankenphp_force_kill_thread(force_kill_slot slot);
void frankenphp_release_thread_for_kill(force_kill_slot slot);

void register_extensions(zend_module_entry **m, int len);

#endif
Loading
Loading