EDNS fallback when timeout and multiple query rtt backoff.

git-svn-id: file:///svn/unbound/trunk@1272 be551aaa-1e26-0410-a405-d3ace91eadb9
This commit is contained in:
Wouter Wijngaards 2008-09-29 14:50:35 +00:00
parent a2b261f8b2
commit d4fadf55a8
11 changed files with 119 additions and 31 deletions

View File

@ -1,3 +1,8 @@
29 September 2008: Wouter
- EDNS lameness detection, if EDNS packets are dropped this is
detected, eventually.
- multiple query timeout rtt backoff does not backoff too much.
26 September 2008: Wouter
- tests for remote-control.
- small memory leak in exception during remote control fixed.

View File

@ -208,13 +208,15 @@ new_host_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
data->ttl = tm + infra->host_ttl;
data->lameness = NULL;
data->edns_version = 0;
data->edns_lame_known = 0;
rtt_init(&data->rtt);
return &key->entry;
}
int
infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
socklen_t addrlen, uint32_t timenow, int* edns_vs, int* to)
socklen_t addrlen, uint32_t timenow, int* edns_vs,
uint8_t* edns_lame_known, int* to)
{
struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr,
addrlen, 0);
@ -231,6 +233,7 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
rtt_init(&data->rtt);
/* do not touch lameness, it may be valid still */
data->edns_version = 0;
data->edns_lame_known = 0;
}
}
if(!e) {
@ -240,6 +243,7 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
data = (struct infra_host_data*)e->data;
*to = rtt_timeout(&data->rtt);
*edns_vs = data->edns_version;
*edns_lame_known = data->edns_lame_known;
slabhash_insert(infra->hosts, e->hash, e, data, NULL);
return 1;
}
@ -247,6 +251,7 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
data = (struct infra_host_data*)e->data;
*to = rtt_timeout(&data->rtt);
*edns_vs = data->edns_version;
*edns_lame_known = data->edns_lame_known;
lock_rw_unlock(&e->lock);
return 1;
}
@ -438,7 +443,7 @@ infra_update_tcp_works(struct infra_cache* infra,
int
infra_rtt_update(struct infra_cache* infra,
struct sockaddr_storage* addr, socklen_t addrlen,
int roundtrip, uint32_t timenow)
int roundtrip, int orig_rtt, uint32_t timenow)
{
struct lruhash_entry* e = infra_lookup_host_nottl(infra, addr,
addrlen, 1);
@ -454,7 +459,7 @@ infra_rtt_update(struct infra_cache* infra,
data = (struct infra_host_data*)e->data;
data->ttl = timenow + infra->host_ttl;
if(roundtrip == -1)
rtt_lost(&data->rtt);
rtt_lost(&data->rtt, orig_rtt);
else rtt_update(&data->rtt, roundtrip);
if(data->rtt.rto > 0)
rto = data->rtt.rto;
@ -483,6 +488,7 @@ infra_edns_update(struct infra_cache* infra,
data = (struct infra_host_data*)e->data;
data->ttl = timenow + infra->host_ttl;
data->edns_version = edns_version;
data->edns_lame_known = 1;
if(needtoinsert)
slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);

View File

@ -70,6 +70,10 @@ struct infra_host_data {
struct lruhash* lameness;
/** edns version that the host supports, -1 means no EDNS */
int edns_version;
/** if the EDNS lameness is already known or not.
* EDNS lame is when EDNS queries or replies are dropped,
* and cause a timeout */
uint8_t edns_lame_known;
};
/**
@ -166,11 +170,14 @@ struct infra_host_data* infra_lookup_host(struct infra_cache* infra,
* @param addrlen: length of addr.
* @param timenow: what time it is now.
* @param edns_vs: edns version it supports, is returned.
* @param edns_lame_known: if EDNS lame (EDNS is dropped in transit) has
* already been probed, is returned.
* @param to: timeout to use, is returned.
* @return: 0 on error.
*/
int infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
socklen_t addrlen, uint32_t timenow, int* edns_vs, int* to);
socklen_t addrlen, uint32_t timenow, int* edns_vs,
uint8_t* edns_lame_known, int* to);
/**
* Check for lameness of this server for a particular zone.
@ -213,12 +220,14 @@ int infra_set_lame(struct infra_cache* infra,
* @param addrlen: length of addr.
* @param roundtrip: estimate of roundtrip time in milliseconds or -1 for
* timeout.
* @param orig_rtt: original rtt for the query that timed out (roundtrip==-1).
* ignored if roundtrip != -1.
* @param timenow: what time it is now.
* @return: 0 on error. new rto otherwise.
*/
int infra_rtt_update(struct infra_cache* infra,
struct sockaddr_storage* addr, socklen_t addrlen,
int roundtrip, uint32_t timenow);
int roundtrip, int orig_rtt, uint32_t timenow);
/**
* Update information for the host, store that a TCP transaction works.

View File

@ -1084,7 +1084,8 @@ serviced_delete(struct serviced_query* sq)
if(sq->pending) {
/* clear up the pending query */
if(sq->status == serviced_query_UDP_EDNS ||
sq->status == serviced_query_UDP) {
sq->status == serviced_query_UDP ||
sq->status == serviced_query_PROBE_EDNS) {
struct pending* p = (struct pending*)sq->pending;
if(p->pc)
portcomm_loweruse(sq->outnet, p->pc);
@ -1184,18 +1185,30 @@ static int
serviced_udp_send(struct serviced_query* sq, ldns_buffer* buff)
{
int rtt, vs;
uint8_t edns_lame_known;
uint32_t now = *sq->outnet->now_secs;
if(!infra_host(sq->outnet->infra, &sq->addr, sq->addrlen, now, &vs,
&rtt))
&edns_lame_known, &rtt))
return 0;
if(sq->status == serviced_initial) {
if(vs != -1)
if(edns_lame_known == 0 && rtt > 5000) {
/* perform EDNS lame probe - check if server is
* EDNS lame (EDNS queries to it are dropped) */
verbose(VERB_ALGO, "serviced query: send probe to see "
" if use of EDNS causes timeouts");
rtt /= 10;
sq->status = serviced_query_PROBE_EDNS;
} else if(vs != -1) {
sq->status = serviced_query_UDP_EDNS;
else sq->status = serviced_query_UDP;
} else {
sq->status = serviced_query_UDP;
}
}
serviced_encode(sq, buff, sq->status == serviced_query_UDP_EDNS);
sq->last_sent_time = *sq->outnet->now_tv;
sq->last_rtt = rtt;
sq->edns_lame_known = (int)edns_lame_known;
verbose(VERB_ALGO, "serviced query UDP timeout=%d msec", rtt);
sq->pending = pending_udp_query(sq->outnet, buff, &sq->addr,
sq->addrlen, rtt, serviced_udp_callback, sq);
@ -1392,9 +1405,17 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
sq->pending = NULL; /* removed after callback */
if(error == NETEVENT_TIMEOUT) {
int rto = 0;
if(sq->status == serviced_query_PROBE_EDNS) {
/* non-EDNS probe failed; not an EDNS lame server */
if(!infra_edns_update(outnet->infra, &sq->addr,
sq->addrlen, 0, (uint32_t)now.tv_sec)) {
log_err("Out of memory caching edns works");
}
sq->status = serviced_query_UDP_EDNS;
}
sq->retry++;
if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
-1, (uint32_t)now.tv_sec)))
-1, sq->last_rtt, (uint32_t)now.tv_sec)))
log_err("out of memory in UDP exponential backoff");
if(sq->retry < OUTBOUND_UDP_RETRY) {
log_name_addr(VERB_ALGO, "retry query", sq->qbuf+10,
@ -1439,6 +1460,25 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
return 0;
}
/* yay! an answer */
if(sq->status == serviced_query_PROBE_EDNS) {
/* probe without EDNS succeeds, so we conclude that this
* host likely has EDNS packets dropped */
log_addr(VERB_OPS, "timeouts, concluded that connection to "
"host drops EDNS packets", &sq->addr, sq->addrlen);
if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
-1, (uint32_t)now.tv_sec)) {
log_err("Out of memory caching no edns for host");
}
sq->status = serviced_query_UDP;
} else if(sq->status == serviced_query_UDP_EDNS &&
!sq->edns_lame_known) {
/* now we know that edns queries received answers store that */
if(!infra_edns_update(outnet->infra, &sq->addr, sq->addrlen,
0, (uint32_t)now.tv_sec)) {
log_err("Out of memory caching edns works");
}
sq->edns_lame_known = 1;
}
if(now.tv_sec > sq->last_sent_time.tv_sec ||
(now.tv_sec == sq->last_sent_time.tv_sec &&
now.tv_usec > sq->last_sent_time.tv_usec)) {
@ -1448,7 +1488,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
verbose(VERB_ALGO, "measured roundtrip at %d msec", roundtime);
log_assert(roundtime >= 0);
if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
roundtime, (uint32_t)now.tv_sec))
roundtime, sq->last_rtt, (uint32_t)now.tv_sec))
log_err("out of memory noting rtt.");
}
serviced_callbacks(sq, error, c, rep);
@ -1631,7 +1671,8 @@ serviced_get_mem(struct serviced_query* sq)
for(sb = sq->cblist; sb; sb = sb->next)
s += sizeof(*sb);
if(sq->status == serviced_query_UDP_EDNS ||
sq->status == serviced_query_UDP) {
sq->status == serviced_query_UDP ||
sq->status == serviced_query_PROBE_EDNS) {
s += sizeof(struct pending);
s += comm_timer_get_mem(NULL);
} else {

View File

@ -294,7 +294,9 @@ struct serviced_query {
/** TCP with EDNS sent */
serviced_query_TCP_EDNS,
/** TCP without EDNS sent */
serviced_query_TCP
serviced_query_TCP,
/** probe to test EDNS lameness (EDNS is dropped) */
serviced_query_PROBE_EDNS
}
/** variable with current status */
status;
@ -304,6 +306,10 @@ struct serviced_query {
int retry;
/** time last UDP was sent */
struct timeval last_sent_time;
/** rtt of last (UDP) message */
int last_rtt;
/** do we know edns probe status already, for UDP_EDNS queries */
int edns_lame_known;
/** outside network this is part of */
struct outside_network* outnet;
/** list of interested parties that need callback on results. */

View File

@ -112,6 +112,8 @@ static void matchline(char* line, struct entry* e)
e->match_ttl = true;
} else if(str_keyword(&parse, "DO")) {
e->match_do = true;
} else if(str_keyword(&parse, "noedns")) {
e->match_noedns = true;
} else if(str_keyword(&parse, "UDP")) {
e->match_transport = transport_udp;
} else if(str_keyword(&parse, "TCP")) {
@ -233,6 +235,7 @@ static struct entry* new_entry()
e->match_all = false;
e->match_ttl = false;
e->match_do = false;
e->match_noedns = false;
e->match_serial = false;
e->ixfr_soa_serial = 0;
e->match_transport = transport_any;
@ -697,6 +700,10 @@ find_match(struct entry* entries, ldns_pkt* query_pkt,
verbose(3, "no DO bit set\n");
continue;
}
if(p->match_noedns && ldns_pkt_edns(query_pkt)) {
verbose(3, "bad; EDNS OPT present\n");
continue;
}
if(p->match_transport != transport_any && p->match_transport != transport) {
verbose(3, "bad transport\n");
continue;

View File

@ -47,6 +47,7 @@
; 'all' has to match header byte for byte and all rrs in packet.
; 'ttl' used with all, rrs in packet must also have matching TTLs.
; 'DO' will match only queries with DO bit set.
; 'noedns' matches queries without EDNS OPT records.
MATCH [opcode] [qtype] [qname] [serial=<value>] [all] [ttl]
MATCH [UDP|TCP] DO
MATCH ...
@ -168,6 +169,8 @@ struct entry {
bool match_ttl;
/** match DO bit */
bool match_do;
/** match absence of EDNS OPT record in query */
bool match_noedns;
/** match query serial with this value. */
uint32_t ixfr_soa_serial;
/** match on UDP/TCP */

View File

@ -289,15 +289,15 @@ rtt_test()
rtt_init(&r);
/* initial value sensible */
unit_assert( rtt_timeout(&r) == init );
rtt_lost(&r);
rtt_lost(&r, init);
unit_assert( rtt_timeout(&r) == init*2 );
rtt_lost(&r);
rtt_lost(&r, init*2);
unit_assert( rtt_timeout(&r) == init*4 );
rtt_update(&r, 4000);
unit_assert( rtt_timeout(&r) >= 2000 );
rtt_lost(&r);
rtt_lost(&r, rtt_timeout(&r) );
for(i=0; i<100; i++) {
rtt_lost(&r);
rtt_lost(&r, rtt_timeout(&r) );
unit_assert( rtt_timeout(&r) > RTT_MIN_TIMEOUT-1);
unit_assert( rtt_timeout(&r) < RTT_MAX_TIMEOUT+1);
}
@ -315,6 +315,7 @@ infra_test()
struct infra_cache* slab;
struct config_file* cfg = config_create();
uint32_t now = 0;
uint8_t edns_lame;
int vs, to;
struct infra_host_key* k;
struct infra_host_data* d;
@ -323,25 +324,25 @@ infra_test()
slab = infra_create(cfg);
unit_assert( infra_host(slab, (struct sockaddr_storage*)&one,
(socklen_t)sizeof(int), now, &vs, &to) );
unit_assert( vs == 0 && to == init );
(socklen_t)sizeof(int), now, &vs, &edns_lame, &to) );
unit_assert( vs == 0 && to == init && edns_lame == 0 );
unit_assert( infra_rtt_update(slab, (struct sockaddr_storage*)&one,
(socklen_t)sizeof(int), -1, now) );
(socklen_t)sizeof(int), -1, init, now) );
unit_assert( infra_host(slab, (struct sockaddr_storage*)&one,
(socklen_t)sizeof(int), now, &vs, &to) );
unit_assert( vs == 0 && to == init*2 );
(socklen_t)sizeof(int), now, &vs, &edns_lame, &to) );
unit_assert( vs == 0 && to == init*2 && edns_lame == 0 );
unit_assert( infra_edns_update(slab, (struct sockaddr_storage*)&one,
(socklen_t)sizeof(int), -1, now) );
unit_assert( infra_host(slab, (struct sockaddr_storage*)&one,
(socklen_t)sizeof(int), now, &vs, &to) );
unit_assert( vs == -1 && to == init*2 );
(socklen_t)sizeof(int), now, &vs, &edns_lame, &to) );
unit_assert( vs == -1 && to == init*2 && edns_lame == 1);
now += cfg->host_ttl + 10;
unit_assert( infra_host(slab, (struct sockaddr_storage*)&one,
(socklen_t)sizeof(int), now, &vs, &to) );
unit_assert( vs == 0 && to == init );
(socklen_t)sizeof(int), now, &vs, &edns_lame, &to) );
unit_assert( vs == 0 && to == init && edns_lame == 0 );
unit_assert( infra_set_lame(slab, (struct sockaddr_storage*)&one,
(socklen_t)sizeof(int), zone, zonelen, now, 0,

BIN
testdata/edns_lame.tpkg vendored Normal file

Binary file not shown.

View File

@ -95,10 +95,17 @@ rtt_update(struct rtt_info* rtt, int ms)
}
void
rtt_lost(struct rtt_info* rtt)
rtt_lost(struct rtt_info* rtt, int orig)
{
/* exponential backoff */
rtt->rto *= 2;
if(rtt->rto > RTT_MAX_TIMEOUT)
rtt->rto = RTT_MAX_TIMEOUT;
/* the original rto is doubled, not the current one to make sure
* that the values in the cache are not increased by lots of
* queries simultaneously as they time out at the same time */
orig *= 2;
if(rtt->rto <= orig) {
rtt->rto = orig;
if(rtt->rto > RTT_MAX_TIMEOUT)
rtt->rto = RTT_MAX_TIMEOUT;
}
}

View File

@ -91,7 +91,10 @@ void rtt_update(struct rtt_info* rtt, int ms);
/**
* Update the statistics with a new timout expired observation.
* @param rtt: round trip statistics structure.
* @param orig: original rtt time given for the query that timed out.
* Used to calculate the maximum responsible backed off time that
* can reasonably be applied.
*/
void rtt_lost(struct rtt_info* rtt);
void rtt_lost(struct rtt_info* rtt, int orig);
#endif /* UTIL_RTT_H */