- Add dnstap-sample-rate that logs only 1/N messages, for high volume

server environments. Thanks Dan Luther.
This commit is contained in:
W.C.A. Wijngaards 2024-07-19 10:04:40 +02:00
parent 8fca3e7c5b
commit c3dd6a2dbd
10 changed files with 87 additions and 2 deletions

View File

@ -439,7 +439,8 @@ unbound-control-setup: smallapp/unbound-control-setup.sh
dnstap.lo dnstap.o: $(srcdir)/dnstap/dnstap.c config.h dnstap/dnstap_config.h \
dnstap/dnstap.pb-c.c dnstap/dnstap.pb-c.h $(srcdir)/dnstap/dnstap.h \
$(srcdir)/util/config_file.h $(srcdir)/util/log.h \
$(srcdir)/util/netevent.h $(srcdir)/util/net_help.h
$(srcdir)/util/netevent.h $(srcdir)/util/net_help.h \
$(srcdir)/util/locks.h
dnstap/dnstap.pb-c.c dnstap/dnstap.pb-c.h: $(srcdir)/dnstap/dnstap.proto
@-if test ! -d dnstap; then $(INSTALL) -d dnstap; fi

View File

@ -86,6 +86,31 @@ dt_pack(const Dnstap__Dnstap *d, void **buf, size_t *sz)
return 1;
}
/** See if the message is sent due to dnstap sample rate */
static int
dt_sample_rate_limited(struct dt_env* env)
{
lock_basic_lock(&env->sample_lock);
/* Sampling is every [n] packets. Where n==1, every packet is sent */
if(env->sample_rate > 1) {
int submit = 0;
/* if sampling is engaged... */
if (env->sample_rate_count > env->sample_rate) {
/* once the count passes the limit */
/* submit the message */
submit = 1;
/* and reset the count */
env->sample_rate_count = 0;
}
/* increment count regardless */
env->sample_rate_count++;
lock_basic_unlock(&env->sample_lock);
return !submit;
}
lock_basic_unlock(&env->sample_lock);
return 0;
}
static void
dt_send(const struct dt_env *env, void *buf, size_t len_buf)
{
@ -146,6 +171,7 @@ dt_create(struct config_file* cfg)
env = (struct dt_env *) calloc(1, sizeof(struct dt_env));
if (!env)
return NULL;
lock_basic_init(&env->sample_lock);
env->dtio = dt_io_thread_create();
if(!env->dtio) {
@ -241,6 +267,12 @@ dt_apply_cfg(struct dt_env *env, struct config_file *cfg)
{
verbose(VERB_OPS, "dnstap Message/FORWARDER_RESPONSE enabled");
}
lock_basic_lock(&env->sample_lock);
if((env->sample_rate = (unsigned int)cfg->dnstap_sample_rate))
{
verbose(VERB_OPS, "dnstap SAMPLE_RATE enabled and set to \"%d\"", (int)env->sample_rate);
}
lock_basic_unlock(&env->sample_lock);
}
int
@ -273,6 +305,7 @@ dt_delete(struct dt_env *env)
if (!env)
return;
dt_io_thread_delete(env->dtio);
lock_basic_destroy(&env->sample_lock);
free(env->identity);
free(env->version);
free(env);
@ -409,6 +442,9 @@ dt_msg_send_client_query(struct dt_env *env,
struct dt_msg dm;
struct timeval qtime;
if(dt_sample_rate_limited(env))
return;
if(tstamp)
memcpy(&qtime, tstamp, sizeof(qtime));
else gettimeofday(&qtime, NULL);
@ -447,6 +483,9 @@ dt_msg_send_client_response(struct dt_env *env,
struct dt_msg dm;
struct timeval rtime;
if(dt_sample_rate_limited(env))
return;
gettimeofday(&rtime, NULL);
/* type */
@ -484,6 +523,9 @@ dt_msg_send_outside_query(struct dt_env *env,
struct timeval qtime;
uint16_t qflags;
if(dt_sample_rate_limited(env))
return;
gettimeofday(&qtime, NULL);
qflags = sldns_buffer_read_u16_at(qmsg, 2);
@ -537,6 +579,9 @@ dt_msg_send_outside_response(struct dt_env *env,
struct dt_msg dm;
uint16_t qflags;
if(dt_sample_rate_limited(env))
return;
(void)qbuf_len; log_assert(qbuf_len >= sizeof(qflags));
memcpy(&qflags, qbuf, sizeof(qflags));
qflags = ntohs(qflags);

View File

@ -39,6 +39,7 @@
#ifdef USE_DNSTAP
#include "util/locks.h"
struct config_file;
struct sldns_buffer;
struct dt_msg_queue;
@ -75,6 +76,13 @@ struct dt_env {
unsigned log_forwarder_query_messages : 1;
/** whether to log Message/FORWARDER_RESPONSE */
unsigned log_forwarder_response_messages : 1;
/** lock on sample count */
lock_basic_type sample_lock;
/** rate limit value from config, samples 1/N messages */
unsigned int sample_rate;
/** rate limit counter */
unsigned int sample_rate_count;
};
/**

View File

@ -1,3 +1,7 @@
19 July 2024: Wouter
- Add dnstap-sample-rate that logs only 1/N messages, for high volume
server environments. Thanks Dan Luther.
16 July 2024: Wouter
- For #1103: Fix to drop mesh state reference for the http2 stream
associated with the reply, not the currently active stream. And

View File

@ -1329,6 +1329,8 @@ remote-control:
# dnstap-identity: ""
# # if "" it uses the package version.
# dnstap-version: ""
# # log only 1/N messages, if 0 it is disabled. default 0.
# dnstap-sample-rate: 0
# dnstap-log-resolver-query-messages: no
# dnstap-log-resolver-response-messages: no
# dnstap-log-client-query-messages: no

View File

@ -2852,6 +2852,13 @@ Default is "".
The version to send with messages, if "" the package version is used.
Default is "".
.TP
.B dnstap-sample-rate: \fI<number>
The sample rate for log of messages, it logs only 1/N messages. With 0 it
is disabled. Default is 0. This is useful in a high volume environment,
where log functionality would otherwise not be reliable. For example 10
would spend only 1/10th time on logging, and 100 would only spend a
hundredth of the time on logging.
.TP
.B dnstap-log-resolver-query-messages: \fI<yes or no>
Enable to log resolver query messages. Default is no.
These are messages from Unbound to upstream servers.

View File

@ -770,6 +770,7 @@ int config_set_option(struct config_file* cfg, const char* opt,
else S_YNO("dnstap-send-version:", dnstap_send_version)
else S_STR("dnstap-identity:", dnstap_identity)
else S_STR("dnstap-version:", dnstap_version)
else S_NUMBER_OR_ZERO("dnstap-sample-rate:", dnstap_sample_rate)
else S_YNO("dnstap-log-resolver-query-messages:",
dnstap_log_resolver_query_messages)
else S_YNO("dnstap-log-resolver-response-messages:",
@ -1249,6 +1250,7 @@ config_get_option(struct config_file* cfg, const char* opt,
else O_YNO(opt, "dnstap-send-version", dnstap_send_version)
else O_STR(opt, "dnstap-identity", dnstap_identity)
else O_STR(opt, "dnstap-version", dnstap_version)
else O_UNS(opt, "dnstap-sample-rate", dnstap_sample_rate)
else O_YNO(opt, "dnstap-log-resolver-query-messages",
dnstap_log_resolver_query_messages)
else O_YNO(opt, "dnstap-log-resolver-response-messages",

View File

@ -592,6 +592,8 @@ struct config_file {
char* dnstap_identity;
/** dnstap "version", package version is used if "". */
char* dnstap_version;
/** dnstap sample rate */
int dnstap_sample_rate;
/** true to log dnstap RESOLVER_QUERY message events */
int dnstap_log_resolver_query_messages;

View File

@ -513,6 +513,7 @@ dnstap-log-forwarder-query-messages{COLON} {
YDVAR(1, VAR_DNSTAP_LOG_FORWARDER_QUERY_MESSAGES) }
dnstap-log-forwarder-response-messages{COLON} {
YDVAR(1, VAR_DNSTAP_LOG_FORWARDER_RESPONSE_MESSAGES) }
dnstap-sample-rate { YDVAR(1, VAR_DNSTAP_SAMPLE_RATE) }
disable-dnssec-lame-check{COLON} { YDVAR(1, VAR_DISABLE_DNSSEC_LAME_CHECK) }
ip-ratelimit{COLON} { YDVAR(1, VAR_IP_RATELIMIT) }
ip-ratelimit-cookie{COLON} { YDVAR(1, VAR_IP_RATELIMIT_COOKIE) }

View File

@ -137,6 +137,7 @@ extern struct config_parser_state* cfg_parser;
%token VAR_DNSTAP_LOG_CLIENT_RESPONSE_MESSAGES
%token VAR_DNSTAP_LOG_FORWARDER_QUERY_MESSAGES
%token VAR_DNSTAP_LOG_FORWARDER_RESPONSE_MESSAGES
%token VAR_DNSTAP_SAMPLE_RATE
%token VAR_RESPONSE_IP_TAG VAR_RESPONSE_IP VAR_RESPONSE_IP_DATA
%token VAR_HARDEN_ALGO_DOWNGRADE VAR_IP_TRANSPARENT
%token VAR_IP_DSCP
@ -3453,7 +3454,8 @@ content_dt: dt_dnstap_enable | dt_dnstap_socket_path | dt_dnstap_bidirectional |
dt_dnstap_log_client_query_messages |
dt_dnstap_log_client_response_messages |
dt_dnstap_log_forwarder_query_messages |
dt_dnstap_log_forwarder_response_messages
dt_dnstap_log_forwarder_response_messages |
dt_dnstap_sample_rate
;
dt_dnstap_enable: VAR_DNSTAP_ENABLE STRING_ARG
{
@ -3617,6 +3619,17 @@ dt_dnstap_log_forwarder_response_messages: VAR_DNSTAP_LOG_FORWARDER_RESPONSE_MES
free($2);
}
;
dt_dnstap_sample_rate: VAR_DNSTAP_SAMPLE_RATE STRING_ARG
{
OUTYY(("P(dt_dnstap_sample_rate:%s)\n", $2));
if(atoi($2) == 0 && strcmp($2, "0") != 0)
yyerror("number expected");
else if(atoi($2) < 0)
yyerror("dnstap sample rate too small");
else cfg_parser->cfg->dnstap_sample_rate = atoi($2);
free($2);
}
;
pythonstart: VAR_PYTHON
{
OUTYY(("\nP(python:)\n"));