- Fix timeouts to keep track of query type, A, AAAA and other, if

another has caused timeout blacklist, different type can still probe.


git-svn-id: file:///svn/unbound/trunk@2613 be551aaa-1e26-0410-a405-d3ace91eadb9
This commit is contained in:
Wouter Wijngaards 2012-02-10 12:17:25 +00:00
parent 358fd2ad07
commit 09b9ea04a3
9 changed files with 350 additions and 18 deletions

View File

@ -1,5 +1,7 @@
10 February 2012: Wouter
- Slightly smaller critical region in one case in infra cache.
- Fix timeouts to keep track of query type, A, AAAA and other, if
another has caused timeout blacklist, different type can still probe.
9 February 2012: Wouter
- Fix AHX_BROKEN_MEMCMP for autoheader mess up of #undef in config.h.

View File

@ -52,6 +52,11 @@
/** Timeout when only a single probe query per IP is allowed. */
#define PROBE_MAXRTO 12000 /* in msec */
/** number of timeouts for a type when the domain can be blocked ;
* even if another type has completely rtt maxed it, the different type
* can do this number of packets (until those all timeout too) */
#define TIMEOUT_COUNT_MAX 3
size_t
infra_sizefunc(void* k, void* ATTR_UNUSED(d))
{
@ -196,6 +201,9 @@ data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
data->rec_lame = 0;
data->lame_type_A = 0;
data->lame_other = 0;
data->timeout_A = 0;
data->timeout_AAAA = 0;
data->timeout_other = 0;
}
/**
@ -250,6 +258,9 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
if(e && ((struct infra_data*)e->data)->ttl < timenow) {
/* it expired, try to reuse existing entry */
int old = ((struct infra_data*)e->data)->rtt.rto;
uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
lock_rw_unlock(&e->lock);
e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
if(e) {
@ -259,9 +270,13 @@ infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
data_entry_init(infra, e, timenow);
wr = 1;
/* TOP_TIMEOUT remains on reuse */
if(old >= USEFUL_SERVER_TOP_TIMEOUT)
if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
((struct infra_data*)e->data)->rtt.rto
= USEFUL_SERVER_TOP_TIMEOUT;
((struct infra_data*)e->data)->timeout_A = tA;
((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
((struct infra_data*)e->data)->timeout_other = tother;
}
}
}
if(!e) {
@ -358,8 +373,8 @@ infra_update_tcp_works(struct infra_cache* infra,
int
infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
socklen_t addrlen, uint8_t* nm, size_t nmlen, int roundtrip,
int orig_rtt, uint32_t timenow)
socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
int roundtrip, int orig_rtt, uint32_t timenow)
{
struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
nm, nmlen, 1);
@ -377,9 +392,24 @@ infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
data = (struct infra_data*)e->data;
if(roundtrip == -1) {
rtt_lost(&data->rtt, orig_rtt);
if(qtype == LDNS_RR_TYPE_A) {
if(data->timeout_A < TIMEOUT_COUNT_MAX)
data->timeout_A++;
} else if(qtype == LDNS_RR_TYPE_AAAA) {
if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
data->timeout_AAAA++;
} else {
if(data->timeout_other < TIMEOUT_COUNT_MAX)
data->timeout_other++;
}
} else {
rtt_update(&data->rtt, roundtrip);
data->probedelay = 0;
if(qtype == LDNS_RR_TYPE_A)
data->timeout_A = 0;
else if(qtype == LDNS_RR_TYPE_AAAA)
data->timeout_AAAA = 0;
else data->timeout_other = 0;
}
if(data->rtt.rto > 0)
rto = data->rtt.rto;
@ -456,9 +486,23 @@ infra_get_lame_rtt(struct infra_cache* infra,
host = (struct infra_data*)e->data;
*rtt = rtt_unclamped(&host->rtt);
if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto)
&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
/* single probe for this domain, and we are not probing */
*rtt = USEFUL_SERVER_TOP_TIMEOUT;
/* unless the query type allows a probe to happen */
if(qtype == LDNS_RR_TYPE_A) {
if(host->timeout_A >= TIMEOUT_COUNT_MAX)
*rtt = USEFUL_SERVER_TOP_TIMEOUT;
else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
} else if(qtype == LDNS_RR_TYPE_AAAA) {
if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
*rtt = USEFUL_SERVER_TOP_TIMEOUT;
else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
} else {
if(host->timeout_other >= TIMEOUT_COUNT_MAX)
*rtt = USEFUL_SERVER_TOP_TIMEOUT;
else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
}
}
if(timenow > host->ttl) {
/* expired entry */
/* see if this can be a re-probe of an unresponsive server */

View File

@ -91,6 +91,13 @@ struct infra_data {
uint8_t lame_type_A;
/** the host is lame (not authoritative) for other query types */
uint8_t lame_other;
/** timeouts counter for type A */
uint8_t timeout_A;
/** timeouts counter for type AAAA */
uint8_t timeout_AAAA;
/** timeouts counter for others */
uint8_t timeout_other;
};
/**
@ -195,6 +202,7 @@ int infra_set_lame(struct infra_cache* infra,
* @param addrlen: length of addr.
* @param name: zone name
* @param namelen: zone name length
* @param qtype: query type.
* @param roundtrip: estimate of roundtrip time in milliseconds or -1 for
* timeout.
* @param orig_rtt: original rtt for the query that timed out (roundtrip==-1).
@ -203,7 +211,7 @@ int infra_set_lame(struct infra_cache* infra,
* @return: 0 on error. new rto otherwise.
*/
int infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
socklen_t addrlen, uint8_t* name, size_t namelen,
socklen_t addrlen, uint8_t* name, size_t namelen, int qtype,
int roundtrip, int orig_rtt, uint32_t timenow);
/**

View File

@ -1166,7 +1166,7 @@ static struct serviced_query*
serviced_create(struct outside_network* outnet, ldns_buffer* buff, int dnssec,
int want_dnssec, int tcp_upstream, int ssl_upstream,
struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone,
size_t zonelen)
size_t zonelen, int qtype)
{
struct serviced_query* sq = (struct serviced_query*)malloc(sizeof(*sq));
#ifdef UNBOUND_DEBUG
@ -1188,6 +1188,7 @@ serviced_create(struct outside_network* outnet, ldns_buffer* buff, int dnssec,
return NULL;
}
sq->zonelen = zonelen;
sq->qtype = qtype;
sq->dnssec = dnssec;
sq->want_dnssec = want_dnssec;
sq->tcp_upstream = tcp_upstream;
@ -1566,8 +1567,8 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error,
* huge due to system-hibernated and we woke up */
if(roundtime < TCP_AUTH_QUERY_TIMEOUT*1000) {
if(!infra_rtt_update(sq->outnet->infra, &sq->addr,
sq->addrlen, sq->zone, sq->zonelen, roundtime,
sq->last_rtt, (uint32_t)now.tv_sec))
sq->addrlen, sq->zone, sq->zonelen, sq->qtype,
roundtime, sq->last_rtt, (uint32_t)now.tv_sec))
log_err("out of memory noting rtt.");
}
}
@ -1658,7 +1659,7 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
}
sq->retry++;
if(!(rto=infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
sq->zone, sq->zonelen, -1, sq->last_rtt,
sq->zone, sq->zonelen, sq->qtype, -1, sq->last_rtt,
(uint32_t)now.tv_sec)))
log_err("out of memory in UDP exponential backoff");
if(sq->retry < OUTBOUND_UDP_RETRY) {
@ -1752,8 +1753,8 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error,
* above this value gives trouble with server selection */
if(roundtime < 60000) {
if(!infra_rtt_update(outnet->infra, &sq->addr, sq->addrlen,
sq->zone, sq->zonelen, roundtime, sq->last_rtt,
(uint32_t)now.tv_sec))
sq->zone, sq->zonelen, sq->qtype, roundtime,
sq->last_rtt, (uint32_t)now.tv_sec))
log_err("out of memory noting rtt.");
}
}
@ -1814,7 +1815,7 @@ outnet_serviced_query(struct outside_network* outnet,
/* make new serviced query entry */
sq = serviced_create(outnet, buff, dnssec, want_dnssec,
tcp_upstream, ssl_upstream, addr, addrlen, zone,
zonelen);
zonelen, qtype);
if(!sq) {
free(cb);
return NULL;

View File

@ -312,6 +312,8 @@ struct serviced_query {
uint8_t* zone;
/** length of zone name */
size_t zonelen;
/** qtype */
int qtype;
/** current status */
enum serviced_query_status {
/** initial status */

View File

@ -147,6 +147,7 @@ delete_fake_pending(struct fake_pending* pend)
{
if(!pend)
return;
free(pend->zone);
ldns_buffer_free(pend->buffer);
ldns_pkt_free(pend->pkt);
free(pend);
@ -554,7 +555,7 @@ do_infra_rtt(struct replay_runtime* runtime)
if(!dp) fatal_exit("cannot parse %s", now->variable);
rto = infra_rtt_update(runtime->infra, &now->addr,
now->addrlen, ldns_rdf_data(dp), ldns_rdf_size(dp),
atoi(now->string), -1, runtime->now_secs);
LDNS_RR_TYPE_A, atoi(now->string), -1, runtime->now_secs);
log_addr(0, "INFRA_RTT for", &now->addr, now->addrlen);
log_info("INFRA_RTT(%s roundtrip %d): rto of %d", now->variable,
atoi(now->string), rto);
@ -562,6 +563,24 @@ do_infra_rtt(struct replay_runtime* runtime)
ldns_rdf_deep_free(dp);
}
/** perform exponential backoff on the timout */
static void
expon_timeout_backoff(struct replay_runtime* runtime)
{
struct fake_pending* p = runtime->pending_list;
int rtt, vs;
uint8_t edns_lame_known;
int last_rtt, rto;
if(!p) return; /* no pending packet to backoff */
if(!infra_host(runtime->infra, &p->addr, p->addrlen, p->zone,
p->zonelen, runtime->now_secs, &vs, &edns_lame_known, &rtt))
return;
last_rtt = rtt;
rto = infra_rtt_update(runtime->infra, &p->addr, p->addrlen, p->zone,
p->zonelen, p->qtype, -1, last_rtt, runtime->now_secs);
log_info("infra_rtt_update returned rto %d", rto);
}
/**
* Advance to the next moment.
*/
@ -608,6 +627,7 @@ do_moment_and_advance(struct replay_runtime* runtime)
case repevt_timeout:
mom = runtime->now;
advance_moment(runtime);
expon_timeout_backoff(runtime);
fake_pending_callback(runtime, mom, NETEVENT_TIMEOUT);
break;
case repevt_back_reply:
@ -929,6 +949,7 @@ pending_udp_query(struct outside_network* outnet, ldns_buffer* packet,
pend->timeout = timeout/1000;
pend->transport = transport_udp;
pend->pkt = NULL;
pend->zone = NULL;
pend->serviced = 0;
pend->runtime = runtime;
status = ldns_buffer2pkt_wire(&pend->pkt, packet);
@ -982,6 +1003,7 @@ pending_tcp_query(struct outside_network* outnet, ldns_buffer* packet,
pend->timeout = timeout;
pend->transport = transport_tcp;
pend->pkt = NULL;
pend->zone = NULL;
pend->runtime = runtime;
pend->serviced = 0;
status = ldns_buffer2pkt_wire(&pend->pkt, packet);
@ -1017,9 +1039,8 @@ struct serviced_query* outnet_serviced_query(struct outside_network* outnet,
uint16_t flags, int dnssec, int ATTR_UNUSED(want_dnssec),
int ATTR_UNUSED(tcp_upstream), int ATTR_UNUSED(ssl_upstream),
struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* zone,
size_t ATTR_UNUSED(zonelen), comm_point_callback_t* callback,
void* callback_arg, ldns_buffer* ATTR_UNUSED(buff),
int (*arg_compare)(void*,void*))
size_t zonelen, comm_point_callback_t* callback, void* callback_arg,
ldns_buffer* ATTR_UNUSED(buff), int (*arg_compare)(void*,void*))
{
struct replay_runtime* runtime = (struct replay_runtime*)outnet->base;
struct fake_pending* pend = (struct fake_pending*)calloc(1,
@ -1062,6 +1083,10 @@ struct serviced_query* outnet_serviced_query(struct outside_network* outnet,
}
memcpy(&pend->addr, addr, addrlen);
pend->addrlen = addrlen;
pend->zone = memdup(zone, zonelen);
pend->zonelen = zonelen;
pend->qtype = qtype;
log_assert(pend->zone);
pend->callback = callback;
pend->cb_arg = callback_arg;
pend->timeout = UDP_AUTH_QUERY_TIMEOUT;

View File

@ -323,6 +323,12 @@ struct fake_pending {
struct sockaddr_storage addr;
/** len of addr */
socklen_t addrlen;
/** zone name, uncompressed wire format (as used when sent) */
uint8_t* zone;
/** length of zone name */
size_t zonelen;
/** qtype */
int qtype;
/** The callback function to call when answer arrives (or timeout) */
comm_point_callback_t* callback;
/** callback user argument */

View File

@ -445,7 +445,7 @@ infra_test(void)
&vs, &edns_lame, &to) );
unit_assert( vs == 0 && to == init && edns_lame == 0 );
unit_assert( infra_rtt_update(slab, &one, onelen, zone, zonelen, -1, init, now) );
unit_assert( infra_rtt_update(slab, &one, onelen, zone, zonelen, LDNS_RR_TYPE_A, -1, init, now) );
unit_assert( infra_host(slab, &one, onelen, zone, zonelen,
now, &vs, &edns_lame, &to) );
unit_assert( vs == 0 && to == init*2 && edns_lame == 0 );

244
testdata/iter_timeout_ra_aaaa.rpl vendored Normal file
View File

@ -0,0 +1,244 @@
; config options
server:
target-fetch-policy: "0 0 0 0 0"
stub-zone:
name: "."
stub-addr: 193.0.14.129 # K.ROOT-SERVERS.NET.
CONFIG_END
SCENARIO_BEGIN Test iterator with timeouts on reclame AAAA dropping server
; K.ROOT-SERVERS.NET.
RANGE_BEGIN 0 100
ADDRESS 193.0.14.129
ENTRY_BEGIN
MATCH opcode qtype qname
ADJUST copy_id
REPLY QR NOERROR
SECTION QUESTION
. IN NS
SECTION ANSWER
. IN NS K.ROOT-SERVERS.NET.
SECTION ADDITIONAL
K.ROOT-SERVERS.NET. IN A 193.0.14.129
ENTRY_END
ENTRY_BEGIN
MATCH opcode subdomain
ADJUST copy_id copy_query
REPLY QR NOERROR
SECTION QUESTION
com. IN A
SECTION AUTHORITY
com. IN NS a.gtld-servers.net.
SECTION ADDITIONAL
a.gtld-servers.net. IN A 192.5.6.30
ENTRY_END
ENTRY_BEGIN
MATCH opcode qtype qname
ADJUST copy_id
REPLY QR AA NOERROR
SECTION QUESTION
ns.example.net. IN A
SECTION ANSWER
ns.example.net. IN A 1.2.3.4
ENTRY_END
ENTRY_BEGIN
MATCH opcode qtype qname
ADJUST copy_id
REPLY QR AA NOERROR
SECTION QUESTION
ns.example.net. IN AAAA
SECTION ANSWER
SECTION AUTHORITY
. IN SOA a. b. 1 2 3 4 5
ENTRY_END
ENTRY_BEGIN
MATCH opcode qtype qname
ADJUST copy_id
REPLY QR NOERROR
SECTION QUESTION
a.gtld-servers.net. IN AAAA
SECTION ANSWER
SECTION AUTHORITY
. IN SOA a. b. 1 2 3 4 5
ENTRY_END
RANGE_END
; a.gtld-servers.net.
RANGE_BEGIN 0 100
ADDRESS 192.5.6.30
ENTRY_BEGIN
MATCH opcode qtype qname
ADJUST copy_id
REPLY QR NOERROR
SECTION QUESTION
com. IN NS
SECTION ANSWER
com. IN NS a.gtld-servers.net.
SECTION ADDITIONAL
a.gtld-servers.net. IN A 192.5.6.30
ENTRY_END
ENTRY_BEGIN
MATCH opcode subdomain
ADJUST copy_id copy_query
REPLY QR NOERROR
SECTION QUESTION
example.com. IN A
SECTION AUTHORITY
example.com. 280 IN NS ns.example.net.
SECTION ADDITIONAL
ns.example.net. IN A 1.2.3.4
ENTRY_END
RANGE_END
; ns.example.com.
; This server is REC_LAME
RANGE_BEGIN 0 100
ADDRESS 1.2.3.4
ENTRY_BEGIN
MATCH opcode qtype qname
ADJUST copy_id
REPLY QR RA NOERROR
SECTION QUESTION
example.com. IN NS
SECTION ANSWER
example.com. 280 IN NS ns.example.net.
ENTRY_END
ENTRY_BEGIN
MATCH opcode qtype qname
ADJUST copy_id
REPLY QR RA NOERROR
SECTION QUESTION
www.example.com. IN A
SECTION ANSWER
www.example.com. 10 IN A 10.20.30.40
SECTION AUTHORITY
example.com. 280 IN NS ns.example.net.
ENTRY_END
RANGE_END
STEP 1 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
www.example.com. IN A
ENTRY_END
; recursion happens here.
STEP 10 CHECK_ANSWER
ENTRY_BEGIN
MATCH all
REPLY QR RD RA NOERROR
SECTION QUESTION
www.example.com. IN A
SECTION ANSWER
www.example.com. IN A 10.20.30.40
SECTION AUTHORITY
example.com. IN NS ns.example.net.
ENTRY_END
; query for (dropped) AAAA record.
STEP 20 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
; the AAAA query times out.
STEP 21 TIMEOUT
STEP 22 TIMEOUT
STEP 23 TIMEOUT
STEP 24 TIMEOUT
STEP 25 TIMEOUT
; we get servfail, but the AAA arrives again (after the servfail times
; out of the cache)
STEP 30 CHECK_ANSWER
ENTRY_BEGIN
MATCH all
REPLY QR RD RA SERVFAIL
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
STEP 31 TIME_PASSES ELAPSE 6
STEP 40 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
; timeouts for AAAA keep happening.
STEP 41 TIMEOUT
STEP 42 CHECK_ANSWER
ENTRY_BEGIN
MATCH all
REPLY QR RD RA SERVFAIL
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
STEP 43 TIME_PASSES ELAPSE 12
STEP 50 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
; fallback queries
STEP 51 TRAFFIC
; and it fails, no parentside entries and so on.
STEP 52 CHECK_ANSWER
ENTRY_BEGIN
MATCH all
REPLY QR RD RA SERVFAIL
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
STEP 53 TIME_PASSES ELAPSE 12
STEP 60 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
STEP 61 TIMEOUT
STEP 62 TRAFFIC
STEP 63 CHECK_ANSWER
ENTRY_BEGIN
MATCH all
REPLY QR RD RA SERVFAIL
SECTION QUESTION
www.example.com. IN AAAA
ENTRY_END
STEP 70 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
www.example.com. IN A
ENTRY_END
STEP 72 TRAFFIC
STEP 73 CHECK_ANSWER
ENTRY_BEGIN
MATCH all
REPLY QR RD RA NOERROR
SECTION QUESTION
www.example.com. IN A
SECTION ANSWER
www.example.com. IN A 10.20.30.40
SECTION AUTHORITY
example.com. IN NS ns.example.net.
ENTRY_END
SCENARIO_END