From 5674bba5c54c1aee3a4ac5b3aba6b3ebded91bbc Mon Sep 17 00:00:00 2001 From: Guus Sliepen Date: Thu, 5 Mar 2009 13:34:13 +0100 Subject: [PATCH] Allow weight to be assigned to Subnets. Tinc allows multiple nodes to own the same Subnet, but did not have a sensible way to decide which one to send packets to. Tinc also did not check the reachability of nodes when deciding where to route packets to, so it would not automatically fail over to a reachable node. Tinc now assigns a weight to each Subnet. The default weight is 10, with lower weights having higher priority. The Subnets are now internally sorted in the same way as the kernel's routing table, and the Subnets are search linearly, skipping those of unreachable nodes. A small cache of recently used addresses is used to speed up the lookup functions. --- doc/tinc.conf.5.in | 9 ++- doc/tinc.texi | 8 +- src/graph.c | 1 + src/subnet.c | 195 ++++++++++++++++++++++++++++++--------------- src/subnet.h | 2 + 5 files changed, 148 insertions(+), 67 deletions(-) diff --git a/doc/tinc.conf.5.in b/doc/tinc.conf.5.in index 0ad3da3d..7486bc88 100644 --- a/doc/tinc.conf.5.in +++ b/doc/tinc.conf.5.in @@ -394,7 +394,7 @@ Either the PEM format is used, or exactly one of the above two options must be s in each host configuration file, if you want to be able to establish a connection with that host. -.It Va Subnet Li = Ar address Ns Op Li / Ns Ar prefixlength +.It Va Subnet Li = Ar address Ns Op Li / Ns Ar prefixlength Ns Op Li # Ns Ar weight The subnet which this tinc daemon will serve. .Nm tinc tries to look up which other daemon it should send a packet to by searching the appropriate subnet. @@ -416,6 +416,13 @@ Read a networking HOWTO/FAQ/guide if you don't understand this. IPv6 subnets are notated like fec0:0:0:1:0:0:0:0/64. MAC addresses are notated like 0:1a:2b:3c:4d:5e. +.Pp +A Subnet can be given a weight to indicate its priority over identical Subnets +owned by different nodes. The default weight is 10. Lower values indicate +higher priority. Packets will be sent to the node with the highest priority, +unless that node is not reachable, in which case the node with the next highest +priority will be tried, and so on. + .It Va TCPOnly Li = yes | no Pq no If this variable is set to yes, then the packets are tunnelled over the TCP connection instead of a UDP connection. diff --git a/doc/tinc.texi b/doc/tinc.texi index ce5b0b4c..5957997c 100644 --- a/doc/tinc.texi +++ b/doc/tinc.texi @@ -1015,7 +1015,7 @@ in each host configuration file, if you want to be able to establish a connection with that host. @cindex Subnet -@item Subnet = <@var{address}[/@var{prefixlength}]> +@item Subnet = <@var{address}[/@var{prefixlength}[#@var{weight}]]> The subnet which this tinc daemon will serve. Tinc tries to look up which other daemon it should send a packet to by searching the appropiate subnet. If the packet matches a subnet, @@ -1039,6 +1039,12 @@ example: netmask 255.255.255.0 would become /24, 255.255.252.0 becomes /22. This conforms to standard CIDR notation as described in @uref{ftp://ftp.isi.edu/in-notes/rfc1519.txt, RFC1519} +A Subnet can be given a weight to indicate its priority over identical Subnets +owned by different nodes. The default weight is 10. Lower values indicate +higher priority. Packets will be sent to the node with the highest priority, +unless that node is not reachable, in which case the node with the next highest +priority will be tried, and so on. + @cindex TCPonly @item TCPonly = (no) If this variable is set to yes, then the packets are tunnelled over a diff --git a/src/graph.c b/src/graph.c index 5dfbb72e..014177ce 100644 --- a/src/graph.c +++ b/src/graph.c @@ -313,6 +313,7 @@ void sssp_bfs(void) void graph(void) { + subnet_cache_flush(); sssp_bfs(); mst_kruskal(); graph_changed = true; diff --git a/src/subnet.c b/src/subnet.c index 0342ca29..64ebcabe 100644 --- a/src/subnet.c +++ b/src/subnet.c @@ -37,6 +37,23 @@ avl_tree_t *subnet_tree; +/* Subnet lookup cache */ + +static ipv4_t cache_ipv4_address[2]; +static subnet_t *cache_ipv4_subnet[2]; +static bool cache_ipv4_valid[2]; +static int cache_ipv4_slot; + +static ipv6_t cache_ipv6_address[2]; +static subnet_t *cache_ipv6_subnet[2]; +static bool cache_ipv6_valid[2]; +static int cache_ipv6_slot; + +void subnet_cache_flush() { + cache_ipv4_valid[0] = cache_ipv4_valid[1] = false; + cache_ipv6_valid[0] = cache_ipv6_valid[1] = false; +} + /* Subnet comparison */ static int subnet_compare_mac(const subnet_t *a, const subnet_t *b) @@ -45,6 +62,11 @@ static int subnet_compare_mac(const subnet_t *a, const subnet_t *b) result = memcmp(&a->net.mac.address, &b->net.mac.address, sizeof(mac_t)); + if(result) + return result; + + result = a->weight - b->weight; + if(result || !a->owner || !b->owner) return result; @@ -55,12 +77,17 @@ static int subnet_compare_ipv4(const subnet_t *a, const subnet_t *b) { int result; - result = memcmp(&a->net.ipv4.address, &b->net.ipv4.address, sizeof(ipv4_t)); + result = b->net.ipv4.prefixlength - a->net.ipv4.prefixlength; if(result) return result; - result = a->net.ipv4.prefixlength - b->net.ipv4.prefixlength; + result = memcmp(&a->net.ipv4.address, &b->net.ipv4.address, sizeof(ipv4_t)); + + if(result) + return result; + + result = a->weight - b->weight; if(result || !a->owner || !b->owner) return result; @@ -72,12 +99,17 @@ static int subnet_compare_ipv6(const subnet_t *a, const subnet_t *b) { int result; - result = memcmp(&a->net.ipv6.address, &b->net.ipv6.address, sizeof(ipv6_t)); + result = b->net.ipv6.prefixlength - a->net.ipv6.prefixlength; if(result) return result; + + result = memcmp(&a->net.ipv6.address, &b->net.ipv6.address, sizeof(ipv6_t)); - result = a->net.ipv6.prefixlength - b->net.ipv6.prefixlength; + if(result) + return result; + + result = a->weight - b->weight; if(result || !a->owner || !b->owner) return result; @@ -118,6 +150,8 @@ void init_subnets(void) cp(); subnet_tree = avl_alloc_tree((avl_compare_t) subnet_compare, (avl_action_t) free_subnet); + + subnet_cache_flush(); } void exit_subnets(void) @@ -167,6 +201,8 @@ void subnet_add(node_t *n, subnet_t *subnet) avl_insert(subnet_tree, subnet); avl_insert(n->subnet_tree, subnet); + + subnet_cache_flush(); } void subnet_del(node_t *n, subnet_t *subnet) @@ -175,6 +211,8 @@ void subnet_del(node_t *n, subnet_t *subnet) avl_delete(n->subnet_tree, subnet); avl_delete(subnet_tree, subnet); + + subnet_cache_flush(); } /* Ascii representation of subnets */ @@ -183,16 +221,18 @@ bool str2net(subnet_t *subnet, const char *subnetstr) { int i, l; uint16_t x[8]; + int weight = 10; cp(); - if(sscanf(subnetstr, "%hu.%hu.%hu.%hu/%d", - &x[0], &x[1], &x[2], &x[3], &l) == 5) { + if(sscanf(subnetstr, "%hu.%hu.%hu.%hu/%d#%d", + &x[0], &x[1], &x[2], &x[3], &l, &weight) >= 5) { if(l < 0 || l > 32) return false; subnet->type = SUBNET_IPV4; subnet->net.ipv4.prefixlength = l; + subnet->weight = weight; for(i = 0; i < 4; i++) { if(x[i] > 255) @@ -203,14 +243,15 @@ bool str2net(subnet_t *subnet, const char *subnetstr) return true; } - if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d", + if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d#%d", &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7], - &l) == 9) { + &l, &weight) >= 9) { if(l < 0 || l > 128) return false; subnet->type = SUBNET_IPV6; subnet->net.ipv6.prefixlength = l; + subnet->weight = weight; for(i = 0; i < 8; i++) subnet->net.ipv6.address.x[i] = htons(x[i]); @@ -218,9 +259,10 @@ bool str2net(subnet_t *subnet, const char *subnetstr) return true; } - if(sscanf(subnetstr, "%hu.%hu.%hu.%hu", &x[0], &x[1], &x[2], &x[3]) == 4) { + if(sscanf(subnetstr, "%hu.%hu.%hu.%hu#%d", &x[0], &x[1], &x[2], &x[3], &weight) >= 4) { subnet->type = SUBNET_IPV4; subnet->net.ipv4.prefixlength = 32; + subnet->weight = weight; for(i = 0; i < 4; i++) { if(x[i] > 255) @@ -231,10 +273,11 @@ bool str2net(subnet_t *subnet, const char *subnetstr) return true; } - if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx", - &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7]) == 8) { + if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx#%d", + &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &x[6], &x[7], &weight) >= 8) { subnet->type = SUBNET_IPV6; subnet->net.ipv6.prefixlength = 128; + subnet->weight = weight; for(i = 0; i < 8; i++) subnet->net.ipv6.address.x[i] = htons(x[i]); @@ -242,9 +285,10 @@ bool str2net(subnet_t *subnet, const char *subnetstr) return true; } - if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx", - &x[0], &x[1], &x[2], &x[3], &x[4], &x[5]) == 6) { + if(sscanf(subnetstr, "%hx:%hx:%hx:%hx:%hx:%hx#%d", + &x[0], &x[1], &x[2], &x[3], &x[4], &x[5], &weight) >= 6) { subnet->type = SUBNET_MAC; + subnet->weight = weight; for(i = 0; i < 6; i++) subnet->net.mac.address.x[i] = x[i]; @@ -266,24 +310,28 @@ bool net2str(char *netstr, int len, const subnet_t *subnet) switch (subnet->type) { case SUBNET_MAC: - snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx", + snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx#%d", subnet->net.mac.address.x[0], subnet->net.mac.address.x[1], subnet->net.mac.address.x[2], subnet->net.mac.address.x[3], - subnet->net.mac.address.x[4], subnet->net.mac.address.x[5]); + subnet->net.mac.address.x[4], + subnet->net.mac.address.x[5], + subnet->weight); break; case SUBNET_IPV4: - snprintf(netstr, len, "%hu.%hu.%hu.%hu/%d", + snprintf(netstr, len, "%hu.%hu.%hu.%hu/%d#%d", subnet->net.ipv4.address.x[0], subnet->net.ipv4.address.x[1], subnet->net.ipv4.address.x[2], - subnet->net.ipv4.address.x[3], subnet->net.ipv4.prefixlength); + subnet->net.ipv4.address.x[3], + subnet->net.ipv4.prefixlength, + subnet->weight); break; case SUBNET_IPV6: - snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d", + snprintf(netstr, len, "%hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx/%d#%d", ntohs(subnet->net.ipv6.address.x[0]), ntohs(subnet->net.ipv6.address.x[1]), ntohs(subnet->net.ipv6.address.x[2]), @@ -292,7 +340,8 @@ bool net2str(char *netstr, int len, const subnet_t *subnet) ntohs(subnet->net.ipv6.address.x[5]), ntohs(subnet->net.ipv6.address.x[6]), ntohs(subnet->net.ipv6.address.x[7]), - subnet->net.ipv6.prefixlength); + subnet->net.ipv6.prefixlength, + subnet->weight); break; default: @@ -332,80 +381,96 @@ subnet_t *lookup_subnet_mac(const mac_t *address) subnet_t *lookup_subnet_ipv4(const ipv4_t *address) { - subnet_t *p, subnet = {0}; + subnet_t *p, *r = NULL, subnet = {0}; + avl_node_t *n; + int i; cp(); + // Check if this address is cached + + for(i = 0; i < 2; i++) { + if(!cache_ipv4_valid[i]) + continue; + if(!memcmp(address, &cache_ipv4_address[i], sizeof *address)) + return cache_ipv4_subnet[i]; + } + + // Search all subnets for a matching one + subnet.type = SUBNET_IPV4; subnet.net.ipv4.address = *address; subnet.net.ipv4.prefixlength = 32; subnet.owner = NULL; - do { - /* Go find subnet */ - - p = avl_search_closest_smaller(subnet_tree, &subnet); - - /* Check if the found subnet REALLY matches */ - - if(p) { - if(p->type != SUBNET_IPV4) { - p = NULL; - break; - } + for(n = subnet_tree->head; n; n = n->next) { + p = n->data; + + if(!p || p->type != subnet.type) + continue; - if(!maskcmp(address, &p->net.ipv4.address, p->net.ipv4.prefixlength)) + if(!maskcmp(address, &p->net.ipv4.address, p->net.ipv4.prefixlength)) { + r = p; + if(p->owner->status.reachable) break; - else { - /* Otherwise, see if there is a bigger enclosing subnet */ - - subnet.net.ipv4.prefixlength = p->net.ipv4.prefixlength - 1; - if(subnet.net.ipv4.prefixlength < 0 || subnet.net.ipv4.prefixlength > 32) - return NULL; - maskcpy(&subnet.net.ipv4.address, &p->net.ipv4.address, subnet.net.ipv4.prefixlength, sizeof(ipv4_t)); - } } - } while(p); + } - return p; + // Cache the result + + cache_ipv4_slot = !cache_ipv4_slot; + memcpy(&cache_ipv4_address[cache_ipv4_slot], address, sizeof *address); + cache_ipv4_subnet[cache_ipv4_slot] = r; + cache_ipv4_valid[cache_ipv4_slot] = true; + + return r; } subnet_t *lookup_subnet_ipv6(const ipv6_t *address) { - subnet_t *p, subnet = {0}; + subnet_t *p, *r = NULL, subnet = {0}; + avl_node_t *n; + int i; cp(); + // Check if this address is cached + + for(i = 0; i < 2; i++) { + if(!cache_ipv6_valid[i]) + continue; + if(!memcmp(address, &cache_ipv6_address[i], sizeof *address)) + return cache_ipv6_subnet[i]; + } + + // Search all subnets for a matching one + subnet.type = SUBNET_IPV6; subnet.net.ipv6.address = *address; subnet.net.ipv6.prefixlength = 128; subnet.owner = NULL; - do { - /* Go find subnet */ - - p = avl_search_closest_smaller(subnet_tree, &subnet); - - /* Check if the found subnet REALLY matches */ - - if(p) { - if(p->type != SUBNET_IPV6) - return NULL; + for(n = subnet_tree->head; n; n = n->next) { + p = n->data; + + if(!p || p->type != subnet.type) + continue; - if(!maskcmp(address, &p->net.ipv6.address, p->net.ipv6.prefixlength)) + if(!maskcmp(address, &p->net.ipv6.address, p->net.ipv6.prefixlength)) { + r = p; + if(p->owner->status.reachable) break; - else { - /* Otherwise, see if there is a bigger enclosing subnet */ - - subnet.net.ipv6.prefixlength = p->net.ipv6.prefixlength - 1; - if(subnet.net.ipv6.prefixlength < 0 || subnet.net.ipv6.prefixlength > 128) - return NULL; - maskcpy(&subnet.net.ipv6.address, &p->net.ipv6.address, subnet.net.ipv6.prefixlength, sizeof(ipv6_t)); - } } - } while(p); + } - return p; + // Cache the result + + cache_ipv6_slot = !cache_ipv6_slot; + memcpy(&cache_ipv6_address[cache_ipv6_slot], address, sizeof *address); + cache_ipv6_subnet[cache_ipv6_slot] = r; + cache_ipv6_valid[cache_ipv6_slot] = true; + + return r; } void subnet_update(node_t *owner, subnet_t *subnet, bool up) { diff --git a/src/subnet.h b/src/subnet.h index c50ac6c8..6d7c7599 100644 --- a/src/subnet.h +++ b/src/subnet.h @@ -53,6 +53,7 @@ typedef struct subnet_t { subnet_type_t type; /* subnet type (IPv4? IPv6? MAC? something even weirder?) */ time_t expires; /* expiry time */ + int weight; /* weight (higher value is higher priority) */ /* And now for the actual subnet: */ @@ -82,5 +83,6 @@ extern subnet_t *lookup_subnet_mac(const mac_t *); extern subnet_t *lookup_subnet_ipv4(const ipv4_t *); extern subnet_t *lookup_subnet_ipv6(const ipv6_t *); extern void dump_subnets(void); +extern void subnet_cache_flush(void); #endif /* __TINC_SUBNET_H__ */ -- 2.20.1