Initial applications isolation support using Linux namespaces.

This commit is contained in:
Tiago de Bem Natel de Moura
2019-09-19 15:25:23 +03:00
parent 6346e641ee
commit c554941b4f
21 changed files with 1467 additions and 201 deletions

View File

@@ -88,6 +88,8 @@ struct nxt_common_app_conf_s {
char *working_directory;
nxt_conf_value_t *environment;
nxt_conf_value_t *isolation;
union {
nxt_external_app_conf_t external;
nxt_python_app_conf_t python;

104
src/nxt_capability.c Normal file
View File

@@ -0,0 +1,104 @@
/*
* Copyright (C) Igor Sysoev
* Copyright (C) NGINX, Inc.
*/
#include <nxt_main.h>
#if (NXT_HAVE_LINUX_CAPABILITY)
#include <linux/capability.h>
#include <sys/syscall.h>
#define nxt_capget(hdrp, datap) \
syscall(SYS_capget, hdrp, datap)
#define nxt_capset(hdrp, datap) \
syscall(SYS_capset, hdrp, datap)
#endif /* NXT_HAVE_LINUX_CAPABILITY */
static nxt_int_t nxt_capability_specific_set(nxt_task_t *task,
nxt_capabilities_t *cap);
nxt_int_t
nxt_capability_set(nxt_task_t *task, nxt_capabilities_t *cap)
{
nxt_assert(cap->setid == 0);
if (geteuid() == 0) {
cap->setid = 1;
return NXT_OK;
}
return nxt_capability_specific_set(task, cap);
}
#if (NXT_HAVE_LINUX_CAPABILITY)
static uint32_t
nxt_capability_linux_get_version()
{
struct __user_cap_header_struct hdr;
hdr.version = _LINUX_CAPABILITY_VERSION;
hdr.pid = nxt_pid;
nxt_capget(&hdr, NULL);
return hdr.version;
}
static nxt_int_t
nxt_capability_specific_set(nxt_task_t *task, nxt_capabilities_t *cap)
{
struct __user_cap_data_struct *val, data[2];
struct __user_cap_header_struct hdr;
/*
* Linux capability v1 fills an u32 struct.
* Linux capability v2 and v3 fills an u64 struct.
* We allocate data[2] for compatibility, we waste 4 bytes on v1.
*
* This is safe as we only need to check CAP_SETUID and CAP_SETGID
* that resides in the first 32-bit chunk.
*/
val = &data[0];
/*
* Ask the kernel the preferred capability version
* instead of using _LINUX_CAPABILITY_VERSION from header.
* This is safer when distributing a pre-compiled Unit binary.
*/
hdr.version = nxt_capability_linux_get_version();
hdr.pid = nxt_pid;
if (nxt_slow_path(nxt_capget(&hdr, val) == -1)) {
nxt_alert(task, "failed to get process capabilities: %E", nxt_errno);
return NXT_ERROR;
}
if ((val->effective & (1 << CAP_SETUID)) == 0) {
return NXT_OK;
}
if ((val->effective & (1 << CAP_SETGID)) == 0) {
return NXT_OK;
}
cap->setid = 1;
return NXT_OK;
}
#else
static nxt_int_t
nxt_capability_specific_set(nxt_task_t *task, nxt_capabilities_t *cap)
{
return NXT_OK;
}
#endif

17
src/nxt_capability.h Normal file
View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) Igor Sysoev
* Copyright (C) NGINX, Inc.
*/
#ifndef _NXT_CAPABILITY_INCLUDED_
#define _NXT_CAPABILITY_INCLUDED_
typedef struct {
uint8_t setid; /* 1 bit */
} nxt_capabilities_t;
NXT_EXPORT nxt_int_t nxt_capability_set(nxt_task_t *task,
nxt_capabilities_t *cap);
#endif /* _NXT_CAPABILITY_INCLUDED_ */

263
src/nxt_clone.c Normal file
View File

@@ -0,0 +1,263 @@
/*
* Copyright (C) Igor Sysoev
* Copyright (C) NGINX, Inc.
*/
#include <nxt_main.h>
#include <sys/types.h>
#include <nxt_conf.h>
#include <nxt_clone.h>
#if (NXT_HAVE_CLONE)
pid_t
nxt_clone(nxt_int_t flags)
{
#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
return syscall(__NR_clone, NULL, flags);
#else
return syscall(__NR_clone, flags, NULL);
#endif
}
#endif
#if (NXT_HAVE_CLONE_NEWUSER)
/* map uid 65534 to unit pid */
#define NXT_DEFAULT_UNPRIV_MAP "65534 %d 1"
nxt_int_t nxt_clone_proc_setgroups(nxt_task_t *task, pid_t child_pid,
const char *str);
nxt_int_t nxt_clone_proc_map_set(nxt_task_t *task, const char* mapfile,
pid_t pid, nxt_int_t defval, nxt_conf_value_t *mapobj);
nxt_int_t nxt_clone_proc_map_write(nxt_task_t *task, const char *mapfile,
pid_t pid, u_char *mapinfo);
typedef struct {
nxt_int_t container;
nxt_int_t host;
nxt_int_t size;
} nxt_clone_procmap_t;
nxt_int_t
nxt_clone_proc_setgroups(nxt_task_t *task, pid_t child_pid, const char *str)
{
int fd, n;
u_char *p, *end;
u_char path[PATH_MAX];
end = path + PATH_MAX;
p = nxt_sprintf(path, end, "/proc/%d/setgroups", child_pid);
*p = '\0';
if (nxt_slow_path(p == end)) {
nxt_alert(task, "error write past the buffer: %s", path);
return NXT_ERROR;
}
fd = open((char *)path, O_RDWR);
if (fd == -1) {
/*
* If the /proc/pid/setgroups doesn't exists, we are
* safe to set uid/gid maps. But if the error is anything
* other than ENOENT, then we should abort and let user know.
*/
if (errno != ENOENT) {
nxt_alert(task, "open(%s): %E", path, nxt_errno);
return NXT_ERROR;
}
return NXT_OK;
}
n = write(fd, str, strlen(str));
close(fd);
if (nxt_slow_path(n == -1)) {
nxt_alert(task, "write(%s): %E", path, nxt_errno);
return NXT_ERROR;
}
return NXT_OK;
}
nxt_int_t
nxt_clone_proc_map_write(nxt_task_t *task, const char *mapfile, pid_t pid,
u_char *mapinfo)
{
int len, mapfd;
u_char *p, *end;
ssize_t n;
u_char buf[256];
end = buf + sizeof(buf);
p = nxt_sprintf(buf, end, "/proc/%d/%s", pid, mapfile);
if (nxt_slow_path(p == end)) {
nxt_alert(task, "writing past the buffer");
return NXT_ERROR;
}
*p = '\0';
mapfd = open((char*)buf, O_RDWR);
if (nxt_slow_path(mapfd == -1)) {
nxt_alert(task, "failed to open proc map (%s) %E", buf, nxt_errno);
return NXT_ERROR;
}
len = nxt_strlen(mapinfo);
n = write(mapfd, (char *)mapinfo, len);
if (nxt_slow_path(n != len)) {
if (n == -1 && nxt_errno == EINVAL) {
nxt_alert(task, "failed to write %s: Check kernel maximum " \
"allowed lines %E", buf, nxt_errno);
} else {
nxt_alert(task, "failed to write proc map (%s) %E", buf,
nxt_errno);
}
return NXT_ERROR;
}
return NXT_OK;
}
nxt_int_t
nxt_clone_proc_map_set(nxt_task_t *task, const char* mapfile, pid_t pid,
nxt_int_t defval, nxt_conf_value_t *mapobj)
{
u_char *p, *end, *mapinfo;
nxt_int_t container, host, size;
nxt_int_t ret, len, count, i;
nxt_conf_value_t *obj, *value;
static nxt_str_t str_cont = nxt_string("container");
static nxt_str_t str_host = nxt_string("host");
static nxt_str_t str_size = nxt_string("size");
/*
* uid_map one-entry size:
* alloc space for 3 numbers (32bit) plus 2 spaces and \n.
*/
len = sizeof(u_char) * (10 + 10 + 10 + 2 + 1);
if (mapobj != NULL) {
count = nxt_conf_array_elements_count(mapobj);
if (count == 0) {
goto default_map;
}
len = len * count + 1;
mapinfo = nxt_malloc(len);
if (nxt_slow_path(mapinfo == NULL)) {
nxt_alert(task, "failed to allocate uid_map buffer");
return NXT_ERROR;
}
p = mapinfo;
end = mapinfo + len;
for (i = 0; i < count; i++) {
obj = nxt_conf_get_array_element(mapobj, i);
value = nxt_conf_get_object_member(obj, &str_cont, NULL);
container = nxt_conf_get_integer(value);
value = nxt_conf_get_object_member(obj, &str_host, NULL);
host = nxt_conf_get_integer(value);
value = nxt_conf_get_object_member(obj, &str_size, NULL);
size = nxt_conf_get_integer(value);
p = nxt_sprintf(p, end, "%d %d %d", container, host, size);
if (nxt_slow_path(p == end)) {
nxt_alert(task, "write past the uid_map buffer");
nxt_free(mapinfo);
return NXT_ERROR;
}
if (i+1 < count) {
*p++ = '\n';
} else {
*p = '\0';
}
}
} else {
default_map:
mapinfo = nxt_malloc(len);
if (nxt_slow_path(mapinfo == NULL)) {
nxt_alert(task, "failed to allocate uid_map buffer");
return NXT_ERROR;
}
end = mapinfo + len;
p = nxt_sprintf(mapinfo, end, NXT_DEFAULT_UNPRIV_MAP, defval);
*p = '\0';
if (nxt_slow_path(p == end)) {
nxt_alert(task, "write past the %s buffer", mapfile);
nxt_free(mapinfo);
return NXT_ERROR;
}
}
ret = nxt_clone_proc_map_write(task, mapfile, pid, mapinfo);
nxt_free(mapinfo);
return ret;
}
nxt_int_t
nxt_clone_proc_map(nxt_task_t *task, pid_t pid, nxt_process_clone_t *clone)
{
nxt_int_t ret;
nxt_int_t uid, gid;
const char *rule;
nxt_runtime_t *rt;
rt = task->thread->runtime;
uid = geteuid();
gid = getegid();
rule = rt->capabilities.setid ? "allow" : "deny";
ret = nxt_clone_proc_map_set(task, "uid_map", pid, uid, clone->uidmap);
if (nxt_slow_path(ret != NXT_OK)) {
return NXT_ERROR;
}
ret = nxt_clone_proc_setgroups(task, pid, rule);
if (nxt_slow_path(ret != NXT_OK)) {
nxt_alert(task, "failed to write /proc/%d/setgroups", pid);
return NXT_ERROR;
}
ret = nxt_clone_proc_map_set(task, "gid_map", pid, gid, clone->gidmap);
if (nxt_slow_path(ret != NXT_OK)) {
return NXT_ERROR;
}
return NXT_OK;
}
#endif

17
src/nxt_clone.h Normal file
View File

@@ -0,0 +1,17 @@
/*
* Copyright (C) Igor Sysoev
* Copyright (C) NGINX, Inc.
*/
#ifndef _NXT_CLONE_INCLUDED_
#define _NXT_CLONE_INCLUDED_
pid_t nxt_clone(nxt_int_t flags);
#if (NXT_HAVE_CLONE_NEWUSER)
nxt_int_t nxt_clone_proc_map(nxt_task_t *task, pid_t pid,
nxt_process_clone_t *clone);
#endif
#endif /* _NXT_CLONE_INCLUDED_ */

View File

@@ -39,9 +39,6 @@ typedef nxt_int_t (*nxt_conf_vldt_member_t)(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
typedef nxt_int_t (*nxt_conf_vldt_element_t)(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
typedef nxt_int_t (*nxt_conf_vldt_system_t)(nxt_conf_validation_t *vldt,
char *name);
static nxt_int_t nxt_conf_vldt_type(nxt_conf_validation_t *vldt,
nxt_str_t *name, nxt_conf_value_t *value, nxt_conf_vldt_type_t type);
@@ -86,10 +83,6 @@ static nxt_int_t nxt_conf_vldt_object_iterator(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value, void *data);
static nxt_int_t nxt_conf_vldt_array_iterator(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value, void *data);
static nxt_int_t nxt_conf_vldt_system(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value, void *data);
static nxt_int_t nxt_conf_vldt_user(nxt_conf_validation_t *vldt, char *name);
static nxt_int_t nxt_conf_vldt_group(nxt_conf_validation_t *vldt, char *name);
static nxt_int_t nxt_conf_vldt_environment(nxt_conf_validation_t *vldt,
nxt_str_t *name, nxt_conf_value_t *value);
static nxt_int_t nxt_conf_vldt_argument(nxt_conf_validation_t *vldt,
@@ -101,6 +94,21 @@ static nxt_int_t nxt_conf_vldt_java_classpath(nxt_conf_validation_t *vldt,
static nxt_int_t nxt_conf_vldt_java_option(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
static nxt_int_t
nxt_conf_vldt_isolation(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
void *data);
static nxt_int_t
nxt_conf_vldt_clone_namespaces(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value, void *data);
#if (NXT_HAVE_CLONE_NEWUSER)
static nxt_int_t nxt_conf_vldt_clone_procmap(nxt_conf_validation_t *vldt,
const char* mapfile, nxt_conf_value_t *value);
static nxt_int_t nxt_conf_vldt_clone_uidmap(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
static nxt_int_t nxt_conf_vldt_clone_gidmap(nxt_conf_validation_t *vldt,
nxt_conf_value_t *value);
#endif
static nxt_conf_vldt_object_t nxt_conf_vldt_websocket_members[] = {
{ nxt_string("read_timeout"),
@@ -340,6 +348,100 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_app_processes_members[] = {
};
static nxt_conf_vldt_object_t nxt_conf_vldt_app_namespaces_members[] = {
#if (NXT_HAVE_CLONE_NEWUSER)
{ nxt_string("credential"),
NXT_CONF_VLDT_BOOLEAN,
NULL,
NULL },
#endif
#if (NXT_HAVE_CLONE_NEWPID)
{ nxt_string("pid"),
NXT_CONF_VLDT_BOOLEAN,
NULL,
NULL },
#endif
#if (NXT_HAVE_CLONE_NEWNET)
{ nxt_string("network"),
NXT_CONF_VLDT_BOOLEAN,
NULL,
NULL },
#endif
#if (NXT_HAVE_CLONE_NEWNS)
{ nxt_string("mount"),
NXT_CONF_VLDT_BOOLEAN,
NULL,
NULL },
#endif
#if (NXT_HAVE_CLONE_NEWUTS)
{ nxt_string("uname"),
NXT_CONF_VLDT_BOOLEAN,
NULL,
NULL },
#endif
#if (NXT_HAVE_CLONE_NEWCGROUP)
{ nxt_string("cgroup"),
NXT_CONF_VLDT_BOOLEAN,
NULL,
NULL },
#endif
NXT_CONF_VLDT_END
};
#if (NXT_HAVE_CLONE_NEWUSER)
static nxt_conf_vldt_object_t nxt_conf_vldt_app_procmap_members[] = {
{ nxt_string("container"),
NXT_CONF_VLDT_INTEGER,
NULL,
NULL },
{ nxt_string("host"),
NXT_CONF_VLDT_INTEGER,
NULL,
NULL },
{ nxt_string("size"),
NXT_CONF_VLDT_INTEGER,
NULL,
NULL },
};
#endif
static nxt_conf_vldt_object_t nxt_conf_vldt_app_isolation_members[] = {
{ nxt_string("namespaces"),
NXT_CONF_VLDT_OBJECT,
&nxt_conf_vldt_clone_namespaces,
(void *) &nxt_conf_vldt_app_namespaces_members },
#if (NXT_HAVE_CLONE_NEWUSER)
{ nxt_string("uidmap"),
NXT_CONF_VLDT_ARRAY,
&nxt_conf_vldt_array_iterator,
(void *) &nxt_conf_vldt_clone_uidmap },
{ nxt_string("gidmap"),
NXT_CONF_VLDT_ARRAY,
&nxt_conf_vldt_array_iterator,
(void *) &nxt_conf_vldt_clone_gidmap },
#endif
NXT_CONF_VLDT_END
};
static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = {
{ nxt_string("type"),
NXT_CONF_VLDT_STRING,
@@ -358,13 +460,13 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = {
{ nxt_string("user"),
NXT_CONF_VLDT_STRING,
nxt_conf_vldt_system,
(void *) &nxt_conf_vldt_user },
NULL,
NULL },
{ nxt_string("group"),
NXT_CONF_VLDT_STRING,
nxt_conf_vldt_system,
(void *) &nxt_conf_vldt_group },
NULL,
NULL },
{ nxt_string("working_directory"),
NXT_CONF_VLDT_STRING,
@@ -376,6 +478,11 @@ static nxt_conf_vldt_object_t nxt_conf_vldt_common_members[] = {
&nxt_conf_vldt_object_iterator,
(void *) &nxt_conf_vldt_environment },
{ nxt_string("isolation"),
NXT_CONF_VLDT_OBJECT,
&nxt_conf_vldt_isolation,
(void *) &nxt_conf_vldt_app_isolation_members },
NXT_CONF_VLDT_END
};
@@ -1251,71 +1358,6 @@ nxt_conf_vldt_array_iterator(nxt_conf_validation_t *vldt,
}
static nxt_int_t
nxt_conf_vldt_system(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
void *data)
{
size_t length;
nxt_str_t name;
nxt_conf_vldt_system_t validator;
char string[32];
/* The cast is required by Sun C. */
validator = (nxt_conf_vldt_system_t) data;
nxt_conf_get_string(value, &name);
length = name.length + 1;
length = nxt_min(length, sizeof(string));
nxt_cpystrn((u_char *) string, name.start, length);
return validator(vldt, string);
}
static nxt_int_t
nxt_conf_vldt_user(nxt_conf_validation_t *vldt, char *user)
{
struct passwd *pwd;
nxt_errno = 0;
pwd = getpwnam(user);
if (pwd != NULL) {
return NXT_OK;
}
if (nxt_errno == 0) {
return nxt_conf_vldt_error(vldt, "User \"%s\" is not found.", user);
}
return NXT_ERROR;
}
static nxt_int_t
nxt_conf_vldt_group(nxt_conf_validation_t *vldt, char *group)
{
struct group *grp;
nxt_errno = 0;
grp = getgrnam(group);
if (grp != NULL) {
return NXT_OK;
}
if (nxt_errno == 0) {
return nxt_conf_vldt_error(vldt, "Group \"%s\" is not found.", group);
}
return NXT_ERROR;
}
static nxt_int_t
nxt_conf_vldt_environment(nxt_conf_validation_t *vldt, nxt_str_t *name,
nxt_conf_value_t *value)
@@ -1353,6 +1395,133 @@ nxt_conf_vldt_environment(nxt_conf_validation_t *vldt, nxt_str_t *name,
}
static nxt_int_t
nxt_conf_vldt_clone_namespaces(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
void *data)
{
return nxt_conf_vldt_object(vldt, value, data);
}
static nxt_int_t
nxt_conf_vldt_isolation(nxt_conf_validation_t *vldt, nxt_conf_value_t *value,
void *data)
{
return nxt_conf_vldt_object(vldt, value, data);
}
#if (NXT_HAVE_CLONE_NEWUSER)
typedef struct {
nxt_int_t container;
nxt_int_t host;
nxt_int_t size;
} nxt_conf_vldt_clone_procmap_conf_t;
static nxt_conf_map_t nxt_conf_vldt_clone_procmap_conf_map[] = {
{
nxt_string("container"),
NXT_CONF_MAP_INT32,
offsetof(nxt_conf_vldt_clone_procmap_conf_t, container),
},
{
nxt_string("host"),
NXT_CONF_MAP_INT32,
offsetof(nxt_conf_vldt_clone_procmap_conf_t, host),
},
{
nxt_string("size"),
NXT_CONF_MAP_INT32,
offsetof(nxt_conf_vldt_clone_procmap_conf_t, size),
},
};
static nxt_int_t
nxt_conf_vldt_clone_procmap(nxt_conf_validation_t *vldt, const char *mapfile,
nxt_conf_value_t *value)
{
nxt_int_t ret;
nxt_conf_vldt_clone_procmap_conf_t procmap;
procmap.container = -1;
procmap.host = -1;
procmap.size = -1;
ret = nxt_conf_map_object(vldt->pool, value,
nxt_conf_vldt_clone_procmap_conf_map,
nxt_nitems(nxt_conf_vldt_clone_procmap_conf_map),
&procmap);
if (ret != NXT_OK) {
return ret;
}
if (procmap.container == -1) {
return nxt_conf_vldt_error(vldt, "The %s requires the "
"\"container\" field set.", mapfile);
}
if (procmap.host == -1) {
return nxt_conf_vldt_error(vldt, "The %s requires the "
"\"host\" field set.", mapfile);
}
if (procmap.size == -1) {
return nxt_conf_vldt_error(vldt, "The %s requires the "
"\"size\" field set.", mapfile);
}
return NXT_OK;
}
static nxt_int_t
nxt_conf_vldt_clone_uidmap(nxt_conf_validation_t *vldt, nxt_conf_value_t *value)
{
nxt_int_t ret;
if (nxt_conf_type(value) != NXT_CONF_OBJECT) {
return nxt_conf_vldt_error(vldt, "The \"uidmap\" array "
"must contain only object values.");
}
ret = nxt_conf_vldt_object(vldt, value,
(void *) nxt_conf_vldt_app_procmap_members);
if (nxt_slow_path(ret != NXT_OK)) {
return ret;
}
return nxt_conf_vldt_clone_procmap(vldt, "uid_map", value);
}
static nxt_int_t
nxt_conf_vldt_clone_gidmap(nxt_conf_validation_t *vldt, nxt_conf_value_t *value)
{
nxt_int_t ret;
if (nxt_conf_type(value) != NXT_CONF_OBJECT) {
return nxt_conf_vldt_error(vldt, "The \"gidmap\" array "
"must contain only object values.");
}
ret = nxt_conf_vldt_object(vldt, value,
(void *) nxt_conf_vldt_app_procmap_members);
if (nxt_slow_path(ret != NXT_OK)) {
return ret;
}
return nxt_conf_vldt_clone_procmap(vldt, "gid_map", value);
}
#endif
static nxt_int_t
nxt_conf_vldt_argument(nxt_conf_validation_t *vldt, nxt_conf_value_t *value)
{

View File

@@ -57,6 +57,7 @@ typedef uint16_t nxt_port_id_t;
#include <nxt_fiber.h>
#include <nxt_thread.h>
#include <nxt_process_type.h>
#include <nxt_capability.h>
#include <nxt_process.h>
#include <nxt_utf8.h>
#include <nxt_file_name.h>

View File

@@ -14,6 +14,10 @@
#include <nxt_cert.h>
#endif
#ifdef NXT_LINUX
#include <linux/sched.h>
#endif
typedef struct {
nxt_socket_t socket;
@@ -68,6 +72,10 @@ static void nxt_main_port_conf_store_handler(nxt_task_t *task,
static void nxt_main_port_access_log_handler(nxt_task_t *task,
nxt_port_recv_msg_t *msg);
static nxt_int_t nxt_init_set_isolation(nxt_task_t *task,
nxt_process_init_t *init, nxt_conf_value_t *isolation);
static nxt_int_t nxt_init_set_ns(nxt_task_t *task,
nxt_process_init_t *init, nxt_conf_value_t *ns);
const nxt_sig_event_t nxt_main_process_signals[] = {
nxt_event_signal(SIGHUP, nxt_main_process_signal_handler),
@@ -134,6 +142,12 @@ static nxt_conf_map_t nxt_common_app_conf[] = {
NXT_CONF_MAP_PTR,
offsetof(nxt_common_app_conf_t, environment),
},
{
nxt_string("isolation"),
NXT_CONF_MAP_PTR,
offsetof(nxt_common_app_conf_t, isolation),
}
};
@@ -271,12 +285,11 @@ nxt_port_main_start_worker_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
nxt_int_t ret;
nxt_buf_t *b;
nxt_port_t *port;
nxt_runtime_t *rt;
nxt_app_type_t idx;
nxt_conf_value_t *conf;
nxt_common_app_conf_t app_conf;
static nxt_str_t nobody = nxt_string("nobody");
ret = NXT_ERROR;
mp = nxt_mp_create(1024, 128, 256, 32);
@@ -311,7 +324,10 @@ nxt_port_main_start_worker_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
goto failed;
}
app_conf.user = nobody;
rt = task->thread->runtime;
app_conf.user.start = (u_char*)rt->user_cred.user;
app_conf.user.length = nxt_strlen(rt->user_cred.user);
ret = nxt_conf_map_object(mp, conf, nxt_common_app_conf,
nxt_nitems(nxt_common_app_conf), &app_conf);
@@ -458,6 +474,8 @@ nxt_main_start_controller_process(nxt_task_t *task, nxt_runtime_t *rt)
return NXT_ERROR;
}
nxt_memzero(init, sizeof(nxt_process_init_t));
init->start = nxt_controller_start;
init->name = "controller";
init->user_cred = &rt->user_cred;
@@ -552,6 +570,8 @@ nxt_main_start_discovery_process(nxt_task_t *task, nxt_runtime_t *rt)
return NXT_ERROR;
}
nxt_memzero(init, sizeof(nxt_process_init_t));
init->start = nxt_discovery_start;
init->name = "discovery";
init->user_cred = &rt->user_cred;
@@ -576,6 +596,8 @@ nxt_main_start_router_process(nxt_task_t *task, nxt_runtime_t *rt)
return NXT_ERROR;
}
nxt_memzero(init, sizeof(nxt_process_init_t));
init->start = nxt_router_start;
init->name = "router";
init->user_cred = &rt->user_cred;
@@ -589,7 +611,6 @@ nxt_main_start_router_process(nxt_task_t *task, nxt_runtime_t *rt)
return nxt_main_create_worker_process(task, rt, init);
}
static nxt_int_t
nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt,
nxt_common_app_conf_t *app_conf, uint32_t stream)
@@ -597,41 +618,72 @@ nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt,
char *user, *group;
u_char *title, *last, *end;
size_t size;
nxt_int_t ret;
nxt_process_init_t *init;
size = sizeof(nxt_process_init_t)
+ sizeof(nxt_user_cred_t)
+ app_conf->user.length + 1
+ app_conf->group.length + 1
+ app_conf->name.length + sizeof("\"\" application");
+ app_conf->name.length
+ sizeof("\"\" application");
if (rt->capabilities.setid) {
size += sizeof(nxt_user_cred_t)
+ app_conf->user.length + 1
+ app_conf->group.length + 1;
}
init = nxt_malloc(size);
if (nxt_slow_path(init == NULL)) {
return NXT_ERROR;
}
init->user_cred = nxt_pointer_to(init, sizeof(nxt_process_init_t));
user = nxt_pointer_to(init->user_cred, sizeof(nxt_user_cred_t));
nxt_memzero(init, sizeof(nxt_process_init_t));
nxt_memcpy(user, app_conf->user.start, app_conf->user.length);
last = nxt_pointer_to(user, app_conf->user.length);
*last++ = '\0';
if (rt->capabilities.setid) {
init->user_cred = nxt_pointer_to(init, sizeof(nxt_process_init_t));
user = nxt_pointer_to(init->user_cred, sizeof(nxt_user_cred_t));
init->user_cred->user = user;
if (app_conf->group.start != NULL) {
group = (char *) last;
nxt_memcpy(group, app_conf->group.start, app_conf->group.length);
last = nxt_pointer_to(group, app_conf->group.length);
nxt_memcpy(user, app_conf->user.start, app_conf->user.length);
last = nxt_pointer_to(user, app_conf->user.length);
*last++ = '\0';
} else {
group = NULL;
}
init->user_cred->user = user;
if (nxt_user_cred_get(task, init->user_cred, group) != NXT_OK) {
return NXT_ERROR;
if (app_conf->group.start != NULL) {
group = (char *) last;
nxt_memcpy(group, app_conf->group.start, app_conf->group.length);
last = nxt_pointer_to(group, app_conf->group.length);
*last++ = '\0';
} else {
group = NULL;
}
ret = nxt_user_cred_get(task, init->user_cred, group);
if (ret != NXT_OK) {
return NXT_ERROR;
}
} else {
if (!nxt_str_eq(&app_conf->user, (u_char *) rt->user_cred.user,
nxt_strlen(rt->user_cred.user)))
{
nxt_alert(task, "cannot set user \"%V\" for app \"%V\": "
"missing capabilities", &app_conf->user, &app_conf->name);
return NXT_ERROR;
}
if (app_conf->group.length > 0
&& !nxt_str_eq(&app_conf->group, (u_char *) rt->group,
nxt_strlen(rt->group)))
{
nxt_alert(task, "cannot set group \"%V\" for app \"%V\": "
"missing capabilities", &app_conf->group,
&app_conf->name);
return NXT_ERROR;
}
last = nxt_pointer_to(init, sizeof(nxt_process_init_t));
}
title = last;
@@ -648,6 +700,11 @@ nxt_main_start_worker_process(nxt_task_t *task, nxt_runtime_t *rt,
init->stream = stream;
init->restart = NULL;
ret = nxt_init_set_isolation(task, init, app_conf->isolation);
if (nxt_slow_path(ret != NXT_OK)) {
return NXT_ERROR;
}
return nxt_main_create_worker_process(task, rt, init);
}
@@ -1246,7 +1303,7 @@ nxt_main_port_modules_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
nxt_conf_value_t *conf, *root, *value;
nxt_app_lang_module_t *lang;
static nxt_str_t root_path = nxt_string("/");
static nxt_str_t root_path = nxt_string("/");
rt = task->thread->runtime;
@@ -1438,3 +1495,105 @@ nxt_main_port_access_log_handler(nxt_task_t *task, nxt_port_recv_msg_t *msg)
msg->port_msg.stream, 0, NULL);
}
}
static nxt_int_t
nxt_init_set_isolation(nxt_task_t *task, nxt_process_init_t *init,
nxt_conf_value_t *isolation)
{
nxt_int_t ret;
nxt_conf_value_t *object;
static nxt_str_t nsname = nxt_string("namespaces");
static nxt_str_t uidname = nxt_string("uidmap");
static nxt_str_t gidname = nxt_string("gidmap");
if (isolation == NULL) {
return NXT_OK;
}
object = nxt_conf_get_object_member(isolation, &nsname, NULL);
if (object != NULL) {
ret = nxt_init_set_ns(task, init, object);
if (ret != NXT_OK) {
return ret;
}
}
object = nxt_conf_get_object_member(isolation, &uidname, NULL);
if (object != NULL) {
init->isolation.clone.uidmap = object;
}
object = nxt_conf_get_object_member(isolation, &gidname, NULL);
if (object != NULL) {
init->isolation.clone.gidmap = object;
}
return NXT_OK;
}
static nxt_int_t
nxt_init_set_ns(nxt_task_t *task, nxt_process_init_t *init, nxt_conf_value_t *namespaces)
{
uint32_t index;
nxt_str_t name;
nxt_int_t flag;
nxt_conf_value_t *value;
index = 0;
while ((value = nxt_conf_next_object_member(namespaces, &name, &index)) != NULL) {
flag = 0;
#if (NXT_HAVE_CLONE_NEWUSER)
if (nxt_str_eq(&name, "credential", 10)) {
flag = CLONE_NEWUSER;
}
#endif
#if (NXT_HAVE_CLONE_NEWPID)
if (nxt_str_eq(&name, "pid", 3)) {
flag = CLONE_NEWPID;
}
#endif
#if (NXT_HAVE_CLONE_NEWNET)
if (nxt_str_eq(&name, "network", 7)) {
flag = CLONE_NEWNET;
}
#endif
#if (NXT_HAVE_CLONE_NEWUTS)
if (nxt_str_eq(&name, "uname", 5)) {
flag = CLONE_NEWUTS;
}
#endif
#if (NXT_HAVE_CLONE_NEWNS)
if (nxt_str_eq(&name, "mount", 5)) {
flag = CLONE_NEWNS;
}
#endif
#if (NXT_HAVE_CLONE_NEWCGROUP)
if (nxt_str_eq(&name, "cgroup", 6)) {
flag = CLONE_NEWCGROUP;
}
#endif
if (!flag) {
nxt_alert(task, "unknown namespace flag: \"%V\"", &name);
return NXT_ERROR;
}
if (nxt_conf_get_integer(value) == 0) {
continue; /* process shares everything by default */
}
init->isolation.clone.flags |= flag;
}
return NXT_OK;
}

View File

@@ -7,10 +7,16 @@
#include <nxt_main.h>
#include <nxt_main_process.h>
#if (NXT_HAVE_CLONE)
#include <nxt_clone.h>
#endif
#include <signal.h>
static void nxt_process_start(nxt_task_t *task, nxt_process_t *process);
static nxt_int_t nxt_user_groups_get(nxt_task_t *task, nxt_user_cred_t *uc);
static nxt_int_t nxt_process_worker_setup(nxt_task_t *task,
nxt_process_t *process, int parentfd);
/* A cached process pid. */
nxt_pid_t nxt_pid;
@@ -34,84 +40,217 @@ nxt_bool_t nxt_proc_remove_notify_matrix[NXT_PROCESS_MAX][NXT_PROCESS_MAX] = {
{ 0, 0, 0, 1, 0 },
};
static nxt_int_t
nxt_process_worker_setup(nxt_task_t *task, nxt_process_t *process, int parentfd) {
pid_t rpid, pid;
ssize_t n;
nxt_int_t parent_status;
nxt_process_t *p;
nxt_runtime_t *rt;
nxt_process_init_t *init;
nxt_process_type_t ptype;
pid = getpid();
rpid = 0;
rt = task->thread->runtime;
init = process->init;
/* Setup the worker process. */
n = read(parentfd, &rpid, sizeof(rpid));
if (nxt_slow_path(n == -1 || n != sizeof(rpid))) {
nxt_alert(task, "failed to read real pid");
return NXT_ERROR;
}
if (nxt_slow_path(rpid == 0)) {
nxt_alert(task, "failed to get real pid from parent");
return NXT_ERROR;
}
nxt_pid = rpid;
/* Clean inherited cached thread tid. */
task->thread->tid = 0;
process->pid = nxt_pid;
if (nxt_pid != pid) {
nxt_debug(task, "app \"%s\" real pid %d", init->name, nxt_pid);
nxt_debug(task, "app \"%s\" isolated pid: %d", init->name, pid);
}
n = read(parentfd, &parent_status, sizeof(parent_status));
if (nxt_slow_path(n == -1 || n != sizeof(parent_status))) {
nxt_alert(task, "failed to read parent status");
return NXT_ERROR;
}
if (nxt_slow_path(close(parentfd) == -1)) {
nxt_alert(task, "failed to close reader pipe fd");
return NXT_ERROR;
}
if (nxt_slow_path(parent_status != NXT_OK)) {
return parent_status;
}
ptype = init->type;
nxt_port_reset_next_id();
nxt_event_engine_thread_adopt(task->thread->engine);
/* Remove not ready processes. */
nxt_runtime_process_each(rt, p) {
if (nxt_proc_conn_matrix[ptype][nxt_process_type(p)] == 0) {
nxt_debug(task, "remove not required process %PI", p->pid);
nxt_process_close_ports(task, p);
continue;
}
if (!p->ready) {
nxt_debug(task, "remove not ready process %PI", p->pid);
nxt_process_close_ports(task, p);
continue;
}
nxt_port_mmaps_destroy(&p->incoming, 0);
nxt_port_mmaps_destroy(&p->outgoing, 0);
} nxt_runtime_process_loop;
nxt_runtime_process_add(task, process);
nxt_process_start(task, process);
process->ready = 1;
return NXT_OK;
}
nxt_pid_t
nxt_process_create(nxt_task_t *task, nxt_process_t *process)
{
int pipefd[2];
nxt_int_t ret;
nxt_pid_t pid;
nxt_process_t *p;
nxt_runtime_t *rt;
nxt_process_type_t ptype;
nxt_process_init_t *init;
rt = task->thread->runtime;
pid = fork();
switch (pid) {
case -1:
nxt_alert(task, "fork() failed while creating \"%s\" %E",
process->init->name, nxt_errno);
break;
case 0:
/* A child. */
nxt_pid = getpid();
/* Clean inherited cached thread tid. */
task->thread->tid = 0;
process->pid = nxt_pid;
ptype = process->init->type;
nxt_port_reset_next_id();
nxt_event_engine_thread_adopt(task->thread->engine);
/* Remove not ready processes */
nxt_runtime_process_each(rt, p) {
if (nxt_proc_conn_matrix[ptype][nxt_process_type(p)] == 0) {
nxt_debug(task, "remove not required process %PI", p->pid);
nxt_process_close_ports(task, p);
continue;
}
if (!p->ready) {
nxt_debug(task, "remove not ready process %PI", p->pid);
nxt_process_close_ports(task, p);
continue;
}
nxt_port_mmaps_destroy(&p->incoming, 0);
nxt_port_mmaps_destroy(&p->outgoing, 0);
} nxt_runtime_process_loop;
nxt_runtime_process_add(task, process);
nxt_process_start(task, process);
process->ready = 1;
break;
default:
/* A parent. */
nxt_debug(task, "fork(\"%s\"): %PI", process->init->name, pid);
process->pid = pid;
nxt_runtime_process_add(task, process);
break;
if (nxt_slow_path(pipe(pipefd) == -1)) {
nxt_alert(task, "failed to create process pipe for passing rpid");
return -1;
}
init = process->init;
#if (NXT_HAVE_CLONE)
pid = nxt_clone(SIGCHLD|init->isolation.clone.flags);
#else
pid = fork();
#endif
if (nxt_slow_path(pid < 0)) {
#if (NXT_HAVE_CLONE)
nxt_alert(task, "clone() failed while creating \"%s\" %E",
init->name, nxt_errno);
#else
nxt_alert(task, "fork() failed while creating \"%s\" %E",
init->name, nxt_errno);
#endif
return pid;
}
if (pid == 0) {
/* Child. */
if (nxt_slow_path(close(pipefd[1]) == -1)) {
nxt_alert(task, "failed to close writer pipe fd");
return NXT_ERROR;
}
ret = nxt_process_worker_setup(task, process, pipefd[0]);
if (nxt_slow_path(ret != NXT_OK)) {
exit(1);
}
/*
* Explicitly return 0 to notice the caller function this is the child.
* The caller must return to the event engine work queue loop.
*/
return 0;
}
/* Parent. */
if (nxt_slow_path(close(pipefd[0]) != 0)) {
nxt_alert(task, "failed to close pipe: %E", nxt_errno);
}
/*
* At this point, the child process is blocked reading the
* pipe fd to get its real pid (rpid).
*
* If anything goes wrong now, we need to terminate the child
* process by sending a NXT_ERROR in the pipe.
*/
#if (NXT_HAVE_CLONE)
nxt_debug(task, "clone(\"%s\"): %PI", init->name, pid);
#else
nxt_debug(task, "fork(\"%s\"): %PI", init->name, pid);
#endif
if (nxt_slow_path(write(pipefd[1], &pid, sizeof(pid)) == -1)) {
nxt_alert(task, "failed to write real pid");
goto fail_cleanup;
}
#if (NXT_HAVE_CLONE_NEWUSER)
if ((init->isolation.clone.flags & CLONE_NEWUSER) == CLONE_NEWUSER) {
ret = nxt_clone_proc_map(task, pid, &init->isolation.clone);
if (nxt_slow_path(ret != NXT_OK)) {
goto fail_cleanup;
}
}
#endif
ret = NXT_OK;
if (nxt_slow_path(write(pipefd[1], &ret, sizeof(ret)) == -1)) {
nxt_alert(task, "failed to write status");
goto fail_cleanup;
}
process->pid = pid;
nxt_runtime_process_add(task, process);
return pid;
fail_cleanup:
ret = NXT_ERROR;
if (nxt_slow_path(write(pipefd[1], &ret, sizeof(ret)) == -1)) {
nxt_alert(task, "failed to write status");
}
if (nxt_slow_path(close(pipefd[1]) != 0)) {
nxt_alert(task, "failed to close pipe: %E", nxt_errno);
}
waitpid(pid, NULL, 0);
return -1;
}
@@ -133,22 +272,17 @@ nxt_process_start(nxt_task_t *task, nxt_process_t *process)
nxt_process_title(task, "unit: %s", init->name);
thread = task->thread;
rt = thread->runtime;
nxt_random_init(&thread->random);
if (init->user_cred != NULL) {
/*
* Changing user credentials requires either root privileges
* or CAP_SETUID and CAP_SETGID capabilities on Linux.
*/
if (rt->capabilities.setid && init->user_cred != NULL) {
ret = nxt_user_cred_set(task, init->user_cred);
if (ret != NXT_OK) {
goto fail;
}
}
rt = thread->runtime;
rt->type = init->type;
engine = thread->engine;
@@ -592,15 +726,8 @@ nxt_user_cred_set(nxt_task_t *task, nxt_user_cred_t *uc)
uc->user, (uint64_t) uc->uid, (uint64_t) uc->base_gid);
if (setgid(uc->base_gid) != 0) {
if (nxt_errno == NXT_EPERM) {
nxt_log(task, NXT_LOG_NOTICE, "setgid(%d) failed %E, ignored",
uc->base_gid, nxt_errno);
return NXT_OK;
} else {
nxt_alert(task, "setgid(%d) failed %E", uc->base_gid, nxt_errno);
return NXT_ERROR;
}
nxt_alert(task, "setgid(%d) failed %E", uc->base_gid, nxt_errno);
return NXT_ERROR;
}
if (uc->gids != NULL) {

View File

@@ -7,6 +7,8 @@
#ifndef _NXT_PROCESS_H_INCLUDED_
#define _NXT_PROCESS_H_INCLUDED_
#include <nxt_conf.h>
typedef pid_t nxt_pid_t;
typedef uid_t nxt_uid_t;
@@ -21,26 +23,35 @@ typedef struct {
nxt_gid_t *gids;
} nxt_user_cred_t;
typedef struct {
nxt_int_t flags;
nxt_conf_value_t *uidmap;
nxt_conf_value_t *gidmap;
} nxt_process_clone_t;
typedef struct nxt_process_init_s nxt_process_init_t;
typedef nxt_int_t (*nxt_process_start_t)(nxt_task_t *task, void *data);
typedef nxt_int_t (*nxt_process_restart_t)(nxt_task_t *task, nxt_runtime_t *rt,
nxt_process_init_t *init);
struct nxt_process_init_s {
nxt_process_start_t start;
const char *name;
nxt_user_cred_t *user_cred;
nxt_process_start_t start;
const char *name;
nxt_user_cred_t *user_cred;
nxt_port_handlers_t *port_handlers;
const nxt_sig_event_t *signals;
nxt_port_handlers_t *port_handlers;
const nxt_sig_event_t *signals;
nxt_process_type_t type;
nxt_process_type_t type;
void *data;
uint32_t stream;
void *data;
uint32_t stream;
nxt_process_restart_t restart;
nxt_process_restart_t restart;
union {
nxt_process_clone_t clone;
} isolation;
};

View File

@@ -692,14 +692,26 @@ nxt_runtime_conf_init(nxt_task_t *task, nxt_runtime_t *rt)
rt->state = NXT_STATE;
rt->control = NXT_CONTROL_SOCK;
nxt_memzero(&rt->capabilities, sizeof(nxt_capabilities_t));
if (nxt_runtime_conf_read_cmd(task, rt) != NXT_OK) {
return NXT_ERROR;
}
if (nxt_user_cred_get(task, &rt->user_cred, rt->group) != NXT_OK) {
if (nxt_capability_set(task, &rt->capabilities) != NXT_OK) {
return NXT_ERROR;
}
if (rt->capabilities.setid) {
if (nxt_user_cred_get(task, &rt->user_cred, rt->group) != NXT_OK) {
return NXT_ERROR;
}
} else {
nxt_log(task, NXT_LOG_WARN, "Unit is running unprivileged, then it "
"cannot use arbitrary user and group.");
}
/* An engine's parameters. */
interface = nxt_service_get(rt->services, "engine", rt->engine);

View File

@@ -59,6 +59,7 @@ struct nxt_runtime_s {
uint32_t engine_connections;
uint32_t auxiliary_threads;
nxt_user_cred_t user_cred;
nxt_capabilities_t capabilities;
const char *group;
const char *pid;
const char *log;

View File

@@ -333,6 +333,7 @@ nxt_unit_init(nxt_unit_init_t *init)
}
}
lib->pid = read_port.id.pid;
ctx = &lib->main_ctx.ctx;
rc = lib->callbacks.add_port(ctx, &ready_port);
@@ -398,7 +399,6 @@ nxt_unit_create(nxt_unit_init_t *init)
lib->processes.slot = NULL;
lib->ports.slot = NULL;
lib->pid = getpid();
lib->log_fd = STDERR_FILENO;
lib->online = 1;