434 lines
10 KiB
C
434 lines
10 KiB
C
|
|
/*
|
|
* Copyright (C) Igor Sysoev
|
|
* Copyright (C) NGINX, Inc.
|
|
*/
|
|
|
|
#include <nxt_main.h>
|
|
|
|
|
|
static void nxt_event_conn_average_rate_update(nxt_event_write_rate_t *rate,
|
|
size_t sent, nxt_msec_t now);
|
|
NXT_LIB_UNIT_TEST_STATIC double
|
|
nxt_event_conn_exponential_approximation(double n);
|
|
static void nxt_event_conn_write_timer_handler(nxt_task_t *task, void *obj,
|
|
void *data);
|
|
|
|
|
|
void
|
|
nxt_event_conn_write(nxt_task_t *task, nxt_event_conn_t *c)
|
|
{
|
|
if (task->thread->engine->batch != 0) {
|
|
nxt_event_conn_write_enqueue(task->thread, task, c);
|
|
|
|
} else {
|
|
c->io->write(task, c, c->socket.data);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
nxt_event_conn_io_write(nxt_task_t *task, void *obj, void *data)
|
|
{
|
|
size_t sent, limit;
|
|
ssize_t ret;
|
|
nxt_buf_t *b;
|
|
nxt_event_conn_t *c;
|
|
nxt_event_engine_t *engine;
|
|
|
|
c = obj;
|
|
|
|
nxt_debug(task, "event conn write fd:%d", c->socket.fd);
|
|
|
|
if (!c->socket.write_ready || c->delayed || c->write == NULL) {
|
|
return;
|
|
}
|
|
|
|
engine = task->thread->engine;
|
|
|
|
c->socket.write_handler = nxt_event_conn_io_write;
|
|
c->socket.error_handler = c->write_state->error_handler;
|
|
|
|
ret = NXT_DECLINED;
|
|
sent = 0;
|
|
b = c->write;
|
|
|
|
limit = nxt_event_conn_write_limit(c);
|
|
|
|
while (limit != 0) {
|
|
|
|
ret = c->io->write_chunk(c, b, limit);
|
|
|
|
if (ret < 0) {
|
|
/* ret == NXT_AGAIN || ret == NXT_ERROR. */
|
|
break;
|
|
}
|
|
|
|
sent += ret;
|
|
limit -= ret;
|
|
|
|
if (c->write_state->process_buffers) {
|
|
b = nxt_sendbuf_completion(task, c->write_work_queue, b, ret);
|
|
c->write = b;
|
|
|
|
} else {
|
|
b = nxt_sendbuf_update(b, ret);
|
|
}
|
|
|
|
if (b == NULL) {
|
|
nxt_event_fd_block_write(engine, &c->socket);
|
|
break;
|
|
}
|
|
|
|
if (!c->socket.write_ready) {
|
|
ret = NXT_AGAIN;
|
|
break;
|
|
}
|
|
}
|
|
|
|
nxt_debug(task, "event conn: %i sent:%z", ret, sent);
|
|
|
|
if (sent != 0) {
|
|
if (c->write_state->autoreset_timer) {
|
|
nxt_event_timer_disable(&c->write_timer);
|
|
}
|
|
}
|
|
|
|
if (ret != NXT_ERROR
|
|
&& !nxt_event_conn_write_delayed(engine, c, sent))
|
|
{
|
|
if (limit == 0) {
|
|
/*
|
|
* Postpone writing until next event poll to allow to
|
|
* process other recevied events and to get new events.
|
|
*/
|
|
c->write_timer.handler = nxt_event_conn_write_timer_handler;
|
|
nxt_event_timer_add(engine, &c->write_timer, 0);
|
|
|
|
} else if (ret == NXT_AGAIN) {
|
|
/*
|
|
* SSL libraries can require to toggle either write or read
|
|
* event if renegotiation occurs during SSL write operation.
|
|
* This case is handled on the event_io->send() level. Timer
|
|
* can be set here because it should be set only for write
|
|
* direction.
|
|
*/
|
|
nxt_event_conn_timer(engine, c, c->write_state, &c->write_timer);
|
|
}
|
|
}
|
|
|
|
if (ret == 0 || sent != 0) {
|
|
/* "ret == 0" means a sync buffer was processed. */
|
|
c->sent += sent;
|
|
nxt_event_conn_io_handle(task->thread, c->write_work_queue,
|
|
c->write_state->ready_handler, task, c, data);
|
|
/*
|
|
* Fall through if first operations were
|
|
* successful but the last one failed.
|
|
*/
|
|
}
|
|
|
|
if (nxt_slow_path(ret == NXT_ERROR)) {
|
|
nxt_event_fd_block_write(engine, &c->socket);
|
|
|
|
nxt_event_conn_io_handle(task->thread, c->write_work_queue,
|
|
c->write_state->error_handler, task, c, data);
|
|
}
|
|
}
|
|
|
|
|
|
size_t
|
|
nxt_event_conn_write_limit(nxt_event_conn_t *c)
|
|
{
|
|
ssize_t limit, correction;
|
|
nxt_event_write_rate_t *rate;
|
|
|
|
rate = c->rate;
|
|
|
|
if (rate == NULL) {
|
|
return c->max_chunk;
|
|
}
|
|
|
|
limit = rate->limit;
|
|
correction = limit - (size_t) rate->average;
|
|
|
|
nxt_log_debug(c->socket.log, "event conn correction:%z average:%0.3f",
|
|
correction, rate->average);
|
|
|
|
limit += correction;
|
|
|
|
if (limit <= 0) {
|
|
return 0;
|
|
}
|
|
|
|
if (rate->limit_after != 0) {
|
|
limit += rate->limit_after;
|
|
limit = nxt_min((size_t) limit, rate->max_limit);
|
|
}
|
|
|
|
return nxt_min((size_t) limit, c->max_chunk);
|
|
}
|
|
|
|
|
|
nxt_bool_t
|
|
nxt_event_conn_write_delayed(nxt_event_engine_t *engine, nxt_event_conn_t *c,
|
|
size_t sent)
|
|
{
|
|
nxt_msec_t timer;
|
|
nxt_event_write_rate_t *rate;
|
|
|
|
rate = c->rate;
|
|
|
|
if (rate != NULL) {
|
|
nxt_event_conn_average_rate_update(rate, sent, engine->timers.now);
|
|
|
|
if (rate->limit_after == 0) {
|
|
timer = sent * 1000 / rate->limit;
|
|
|
|
} else if (rate->limit_after >= sent) {
|
|
timer = sent * 1000 / rate->max_limit;
|
|
rate->limit_after -= sent;
|
|
|
|
} else {
|
|
sent -= rate->limit_after;
|
|
timer = rate->limit_after * 1000 / rate->max_limit
|
|
+ sent * 1000 / rate->limit;
|
|
rate->limit_after = 0;
|
|
}
|
|
|
|
nxt_log_debug(c->socket.log, "event conn timer: %M", timer);
|
|
|
|
if (timer != 0) {
|
|
c->delayed = 1;
|
|
|
|
nxt_event_fd_block_write(engine, &c->socket);
|
|
|
|
c->write_timer.handler = nxt_event_conn_write_timer_handler;
|
|
nxt_event_timer_add(engine, &c->write_timer, timer);
|
|
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* Exponentially weighted moving average rate for a given interval. */
|
|
|
|
static void
|
|
nxt_event_conn_average_rate_update(nxt_event_write_rate_t *rate, size_t sent,
|
|
nxt_msec_t now)
|
|
{
|
|
double weight, delta;
|
|
nxt_msec_t elapsed;
|
|
const nxt_uint_t interval = 10; /* 10s */
|
|
|
|
elapsed = now - rate->last;
|
|
|
|
if (elapsed == 0) {
|
|
return;
|
|
}
|
|
|
|
rate->last = now;
|
|
delta = (double) elapsed / 1000;
|
|
|
|
weight = nxt_event_conn_exponential_approximation(-delta / interval);
|
|
|
|
rate->average = (1 - weight) * sent / delta + weight * rate->average;
|
|
|
|
nxt_thread_log_debug("event conn delta:%0.3f, weight:%0.3f, average:%0.3f",
|
|
delta, weight, rate->average);
|
|
}
|
|
|
|
|
|
/*
|
|
* exp() takes tens or hundreds nanoseconds on modern CPU.
|
|
* This is a faster exp() approximation based on IEEE-754 format
|
|
* layout and described in "A Fast, Compact Approximation of
|
|
* the Exponential Function" * by N. N. Schraudolph, 1999.
|
|
*/
|
|
|
|
NXT_LIB_UNIT_TEST_STATIC double
|
|
nxt_event_conn_exponential_approximation(double x)
|
|
{
|
|
union {
|
|
double d;
|
|
int64_t n;
|
|
} exp;
|
|
|
|
if (x < -100) {
|
|
/*
|
|
* The approximation is correct in -700 to 700 range.
|
|
* The "x" argument is always negative.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* x * 2^52 / ln(2) + (1023 * 2^52 - 261140389990637.73
|
|
*
|
|
* 52 is the number of mantissa bits;
|
|
* 1023 is the exponent bias;
|
|
* 261140389990637.73 is the adjustment parameter to
|
|
* improve the approximation. The parameter is equal to
|
|
*
|
|
* 2^52 * ln[ 3 / (8 * ln(2)) + 0.5 ] / ln(2)
|
|
*
|
|
* Only significant digits of the double float format
|
|
* are used to present the double float constants.
|
|
*/
|
|
exp.n = x * 4503599627370496.0 / 0.69314718055994530
|
|
+ (4607182418800017408.0 - 261140389990637.73);
|
|
|
|
return exp.d;
|
|
}
|
|
|
|
|
|
static void
|
|
nxt_event_conn_write_timer_handler(nxt_task_t *task, void *obj, void *data)
|
|
{
|
|
nxt_event_conn_t *c;
|
|
nxt_event_timer_t *ev;
|
|
|
|
ev = obj;
|
|
|
|
nxt_debug(task, "event conn conn timer");
|
|
|
|
c = nxt_event_write_timer_conn(ev);
|
|
c->delayed = 0;
|
|
|
|
c->io->write(task, c, c->socket.data);
|
|
}
|
|
|
|
|
|
ssize_t
|
|
nxt_event_conn_io_write_chunk(nxt_event_conn_t *c, nxt_buf_t *b, size_t limit)
|
|
{
|
|
ssize_t ret;
|
|
|
|
ret = c->io->sendbuf(c, b, limit);
|
|
|
|
if ((ret == NXT_AGAIN || !c->socket.write_ready)
|
|
&& nxt_event_fd_is_disabled(c->socket.write))
|
|
{
|
|
nxt_event_fd_enable_write(c->socket.task->thread->engine, &c->socket);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
ssize_t
|
|
nxt_event_conn_io_sendbuf(nxt_event_conn_t *c, nxt_buf_t *b, size_t limit)
|
|
{
|
|
nxt_uint_t niob;
|
|
struct iovec iob[NXT_IOBUF_MAX];
|
|
nxt_sendbuf_coalesce_t sb;
|
|
|
|
sb.buf = b;
|
|
sb.iobuf = iob;
|
|
sb.nmax = NXT_IOBUF_MAX;
|
|
sb.sync = 0;
|
|
sb.size = 0;
|
|
sb.limit = limit;
|
|
|
|
niob = nxt_sendbuf_mem_coalesce(c->socket.task, &sb);
|
|
|
|
if (niob == 0 && sb.sync) {
|
|
return 0;
|
|
}
|
|
|
|
return nxt_event_conn_io_writev(c, iob, niob);
|
|
}
|
|
|
|
|
|
ssize_t
|
|
nxt_event_conn_io_writev(nxt_event_conn_t *c, nxt_iobuf_t *iob, nxt_uint_t niob)
|
|
{
|
|
ssize_t n;
|
|
nxt_err_t err;
|
|
|
|
if (niob == 1) {
|
|
/* Disposal of surplus kernel iovec copy-in operation. */
|
|
return nxt_event_conn_io_send(c, iob->iov_base, iob->iov_len);
|
|
}
|
|
|
|
for ( ;; ) {
|
|
n = writev(c->socket.fd, iob, niob);
|
|
|
|
err = (n == -1) ? nxt_socket_errno : 0;
|
|
|
|
nxt_log_debug(c->socket.log, "writev(%d, %ui): %d",
|
|
c->socket.fd, niob, n);
|
|
|
|
if (n > 0) {
|
|
return n;
|
|
}
|
|
|
|
/* n == -1 */
|
|
|
|
switch (err) {
|
|
|
|
case NXT_EAGAIN:
|
|
nxt_log_debug(c->socket.log, "writev() %E", err);
|
|
c->socket.write_ready = 0;
|
|
return NXT_AGAIN;
|
|
|
|
case NXT_EINTR:
|
|
nxt_log_debug(c->socket.log, "writev() %E", err);
|
|
continue;
|
|
|
|
default:
|
|
c->socket.error = err;
|
|
nxt_log_error(nxt_socket_error_level(err, c->socket.log_error),
|
|
c->socket.log, "writev(%d, %ui) failed %E",
|
|
c->socket.fd, niob, err);
|
|
return NXT_ERROR;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
ssize_t
|
|
nxt_event_conn_io_send(nxt_event_conn_t *c, void *buf, size_t size)
|
|
{
|
|
ssize_t n;
|
|
nxt_err_t err;
|
|
|
|
for ( ;; ) {
|
|
n = send(c->socket.fd, buf, size, 0);
|
|
|
|
err = (n == -1) ? nxt_socket_errno : 0;
|
|
|
|
nxt_debug(c->socket.task, "send(%d, %p, %uz): %z",
|
|
c->socket.fd, buf, size, n);
|
|
|
|
if (n > 0) {
|
|
return n;
|
|
}
|
|
|
|
/* n == -1 */
|
|
|
|
switch (err) {
|
|
|
|
case NXT_EAGAIN:
|
|
nxt_debug(c->socket.task, "send() %E", err);
|
|
c->socket.write_ready = 0;
|
|
return NXT_AGAIN;
|
|
|
|
case NXT_EINTR:
|
|
nxt_debug(c->socket.task, "send() %E", err);
|
|
continue;
|
|
|
|
default:
|
|
c->socket.error = err;
|
|
nxt_log(c->socket.task,
|
|
nxt_socket_error_level(err, c->socket.log_error),
|
|
"send(%d, %p, %uz) failed %E",
|
|
c->socket.fd, buf, size, err);
|
|
return NXT_ERROR;
|
|
}
|
|
}
|
|
}
|