Fixing isolated process PID manipulation.

Registering an isolated PID in the global PID hash is wrong
because it can be duplicated.  Isolated processes are stored only
in the children list until the response for the WHOAMI message is
processed and the global PID is discovered.

To remove isolated siblings, a pointer to the children list is
introduced in the nxt_process_init_t struct.

This closes #633 issue on GitHub.
This commit is contained in:
Max Romanov
2022-02-17 12:33:46 +00:00
committed by Andrew Clayton
parent d37b76232e
commit 900828cc4b
6 changed files with 108 additions and 25 deletions

View File

@@ -93,6 +93,13 @@ the ruby application process could crash if it's interrupted by SIGTERM signal.
</para> </para>
</change> </change>
<change type="bugfix">
<para>
when isolated PID numbers reach the prototype process host PID,
the prototype crashed.
</para>
</change>
</changes> </changes>

View File

@@ -23,6 +23,13 @@
#endif #endif
#ifdef WCOREDUMP
#define NXT_WCOREDUMP(s) WCOREDUMP(s)
#else
#define NXT_WCOREDUMP(s) 0
#endif
typedef struct { typedef struct {
nxt_app_type_t type; nxt_app_type_t type;
nxt_str_t version; nxt_str_t version;
@@ -636,6 +643,8 @@ nxt_proto_start_process_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
process->data.app = nxt_app_conf; process->data.app = nxt_app_conf;
process->stream = msg->port_msg.stream; process->stream = msg->port_msg.stream;
init->siblings = &nxt_proto_children;
ret = nxt_process_start(task, process); ret = nxt_process_start(task, process);
if (nxt_slow_path(ret == NXT_ERROR)) { if (nxt_slow_path(ret == NXT_ERROR)) {
nxt_process_use(task, process, -1); nxt_process_use(task, process, -1);
@@ -711,15 +720,19 @@ nxt_proto_process_created_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
nxt_debug(task, "app process %PI (aka %PI) is created", isolated_pid, nxt_debug(task, "app process %PI (aka %PI) is created", isolated_pid,
pid); pid);
nxt_runtime_process_remove(task->thread->runtime, process);
process->pid = pid; process->pid = pid;
nxt_runtime_process_add(task, process);
} else { } else {
nxt_debug(task, "app process %PI is created", isolated_pid); nxt_debug(task, "app process %PI is created", isolated_pid);
} }
if (!process->registered) {
nxt_assert(!nxt_queue_is_empty(&process->ports));
nxt_runtime_process_add(task, process);
nxt_port_use(task, nxt_process_port_first(process), -1);
}
} }
@@ -753,7 +766,11 @@ nxt_proto_sigchld_handler(nxt_task_t *task, void *obj, void *data)
int status; int status;
nxt_err_t err; nxt_err_t err;
nxt_pid_t pid; nxt_pid_t pid;
nxt_port_t *port;
nxt_process_t *process; nxt_process_t *process;
nxt_runtime_t *rt;
rt = task->thread->runtime;
nxt_debug(task, "proto sigchld handler signo:%d (%s)", nxt_debug(task, "proto sigchld handler signo:%d (%s)",
(int) (uintptr_t) obj, data); (int) (uintptr_t) obj, data);
@@ -783,32 +800,58 @@ nxt_proto_sigchld_handler(nxt_task_t *task, void *obj, void *data)
return; return;
} }
process = nxt_proto_process_remove(task, pid);
if (WTERMSIG(status)) { if (WTERMSIG(status)) {
#ifdef WCOREDUMP if (rt->is_pid_isolated) {
nxt_alert(task, "app process (isolated %PI) exited on signal %d%s", nxt_alert(task, "app process %PI (isolated %PI) "
pid, WTERMSIG(status), "exited on signal %d%s",
WCOREDUMP(status) ? " (core dumped)" : ""); process != NULL ? process->pid : 0,
#else pid, WTERMSIG(status),
nxt_alert(task, "app process (isolated %PI) exited on signal %d", NXT_WCOREDUMP(status) ? " (core dumped)" : "");
pid, WTERMSIG(status));
#endif } else {
nxt_alert(task, "app process %PI exited on signal %d%s",
pid, WTERMSIG(status),
NXT_WCOREDUMP(status) ? " (core dumped)" : "");
}
} else { } else {
nxt_trace(task, "app process (isolated %PI) exited with code %d", if (rt->is_pid_isolated) {
pid, WEXITSTATUS(status)); nxt_trace(task, "app process %PI (isolated %PI) "
"exited with code %d",
process != NULL ? process->pid : 0,
pid, WEXITSTATUS(status));
} else {
nxt_trace(task, "app process %PI exited with code %d",
pid, WEXITSTATUS(status));
}
} }
process = nxt_proto_process_remove(task, pid);
if (process == NULL) { if (process == NULL) {
continue; continue;
} }
if (process->registered) {
port = NULL;
} else {
nxt_assert(!nxt_queue_is_empty(&process->ports));
port = nxt_process_port_first(process);
}
if (process->state != NXT_PROCESS_STATE_CREATING) { if (process->state != NXT_PROCESS_STATE_CREATING) {
nxt_port_remove_notify_others(task, process); nxt_port_remove_notify_others(task, process);
} }
nxt_process_close_ports(task, process); nxt_process_close_ports(task, process);
if (port != NULL) {
nxt_port_use(task, port, -1);
}
if (nxt_proto_exiting && nxt_queue_is_empty(&nxt_proto_children)) { if (nxt_proto_exiting && nxt_queue_is_empty(&nxt_proto_children)) {
nxt_process_quit(task, 0); nxt_process_quit(task, 0);
return; return;
@@ -1122,7 +1165,7 @@ nxt_proto_process_add(nxt_task_t *task, nxt_process_t *process)
break; break;
default: default:
nxt_debug(task, "process (isolated %PI) failed to add", nxt_alert(task, "process (isolated %PI) failed to add",
process->isolated_pid); process->isolated_pid);
break; break;
} }

View File

@@ -296,6 +296,16 @@ nxt_process_child_fixup(nxt_task_t *task, nxt_process_t *process)
} nxt_runtime_process_loop; } nxt_runtime_process_loop;
if (init->siblings != NULL) {
nxt_queue_each(p, init->siblings, nxt_process_t, link) {
nxt_debug(task, "remove sibling process %PI", p->pid);
nxt_process_close_ports(task, p);
} nxt_queue_loop;
}
return NXT_OK; return NXT_OK;
} }
@@ -303,8 +313,9 @@ nxt_process_child_fixup(nxt_task_t *task, nxt_process_t *process)
static nxt_pid_t static nxt_pid_t
nxt_process_create(nxt_task_t *task, nxt_process_t *process) nxt_process_create(nxt_task_t *task, nxt_process_t *process)
{ {
nxt_int_t ret; nxt_int_t ret;
nxt_pid_t pid; nxt_pid_t pid;
nxt_runtime_t *rt;
#if (NXT_HAVE_CLONE) #if (NXT_HAVE_CLONE)
pid = nxt_clone(SIGCHLD | process->isolation.clone.flags); pid = nxt_clone(SIGCHLD | process->isolation.clone.flags);
@@ -352,7 +363,20 @@ nxt_process_create(nxt_task_t *task, nxt_process_t *process)
process->pid = pid; process->pid = pid;
process->isolated_pid = pid; process->isolated_pid = pid;
nxt_runtime_process_add(task, process); rt = task->thread->runtime;
if (rt->is_pid_isolated) {
/*
* Do not register process in runtime with isolated pid.
* Only global pid can be the key to avoid clash.
*/
nxt_assert(!nxt_queue_is_empty(&process->ports));
nxt_port_use(task, nxt_process_port_first(process), 1);
} else {
nxt_runtime_process_add(task, process);
}
return pid; return pid;
} }
@@ -960,6 +984,8 @@ nxt_process_close_ports(nxt_task_t *task, nxt_process_t *process)
{ {
nxt_port_t *port; nxt_port_t *port;
nxt_process_use(task, process, 1);
nxt_process_port_each(process, port) { nxt_process_port_each(process, port) {
nxt_port_close(task, port); nxt_port_close(task, port);
@@ -967,6 +993,8 @@ nxt_process_close_ports(nxt_task_t *task, nxt_process_t *process)
nxt_runtime_port_remove(task, port); nxt_runtime_port_remove(task, port);
} nxt_process_port_loop; } nxt_process_port_loop;
nxt_process_use(task, process, -1);
} }

View File

@@ -148,6 +148,8 @@ typedef struct {
const nxt_port_handlers_t *port_handlers; const nxt_port_handlers_t *port_handlers;
const nxt_sig_event_t *signals; const nxt_sig_event_t *signals;
nxt_queue_t *siblings;
} nxt_process_init_t; } nxt_process_init_t;

View File

@@ -1408,6 +1408,7 @@ nxt_runtime_process_release(nxt_runtime_t *rt, nxt_process_t *process)
nxt_assert(process->use_count == 0); nxt_assert(process->use_count == 0);
nxt_assert(process->registered == 0); nxt_assert(process->registered == 0);
nxt_assert(nxt_queue_is_empty(&process->ports));
nxt_port_mmaps_destroy(&process->incoming, 1); nxt_port_mmaps_destroy(&process->incoming, 1);
@@ -1579,11 +1580,11 @@ nxt_runtime_process_add(nxt_task_t *task, nxt_process_t *process)
process->registered = 1; process->registered = 1;
nxt_thread_log_debug("process %PI added", process->pid); nxt_debug(task, "process %PI added", process->pid);
break; break;
default: default:
nxt_thread_log_debug("process %PI failed to add", process->pid); nxt_alert(task, "process %PI failed to add", process->pid);
break; break;
} }
@@ -1597,6 +1598,8 @@ nxt_runtime_process_remove(nxt_runtime_t *rt, nxt_process_t *process)
nxt_pid_t pid; nxt_pid_t pid;
nxt_lvlhsh_query_t lhq; nxt_lvlhsh_query_t lhq;
nxt_assert(process->registered != 0);
pid = process->pid; pid = process->pid;
nxt_runtime_process_lhq_pid(&lhq, &pid); nxt_runtime_process_lhq_pid(&lhq, &pid);
@@ -1608,9 +1611,9 @@ nxt_runtime_process_remove(nxt_runtime_t *rt, nxt_process_t *process)
switch (nxt_lvlhsh_delete(&rt->processes, &lhq)) { switch (nxt_lvlhsh_delete(&rt->processes, &lhq)) {
case NXT_OK: case NXT_OK:
rt->nprocesses--; nxt_assert(lhq.value == process);
process = lhq.value; rt->nprocesses--;
process->registered = 0; process->registered = 0;
@@ -1618,7 +1621,7 @@ nxt_runtime_process_remove(nxt_runtime_t *rt, nxt_process_t *process)
break; break;
default: default:
nxt_thread_log_debug("process %PI remove failed", pid); nxt_thread_log_alert("process %PI remove failed", pid);
break; break;
} }

View File

@@ -616,7 +616,7 @@ basicConstraints = critical,CA:TRUE"""
subprocess.check_output(['kill', '-9', app_id]) subprocess.check_output(['kill', '-9', app_id])
skip_alert(r'process .* %s.* exited on signal 9' % app_id) skip_alert(r'process %s exited on signal 9' % app_id)
self.wait_for_record( self.wait_for_record(
r' (?!' + app_id + r'#)(\d+)#\d+ "mirror" application started' r' (?!' + app_id + r'#)(\d+)#\d+ "mirror" application started'