Detect increases in PMTU.
[tinc] / src / net_packet.c
index cf5fb93..c209248 100644 (file)
@@ -1,7 +1,9 @@
 /*
     net_packet.c -- Handles in- and outgoing VPN packets
     Copyright (C) 1998-2005 Ivo Timmermans,
-                  2000-2010 Guus Sliepen <guus@tinc-vpn.org>
+                  2000-2012 Guus Sliepen <guus@tinc-vpn.org>
+                  2010      Timothy Redaelli <timothy@redaelli.eu>
+                  2010      Brandon Black <blblack@gmail.com>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -41,7 +43,6 @@
 #include "ethernet.h"
 #include "event.h"
 #include "graph.h"
-#include "list.h"
 #include "logger.h"
 #include "net.h"
 #include "netutl.h"
@@ -59,12 +60,26 @@ static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999
 
 static void send_udppacket(node_t *, vpn_packet_t *);
 
+unsigned replaywin = 16;
+bool localdiscovery = false;
+
 #define MAX_SEQNO 1073741824
 
-// mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
-// mtuprobes ==    31: sleep pinginterval seconds
-// mtuprobes ==    32: send 1 burst, sleep pingtimeout second
-// mtuprobes ==    33: no response from other side, restart PMTU discovery process
+/* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
+   mtuprobes ==    31: sleep pinginterval seconds
+   mtuprobes ==    32: send 1 burst, sleep pingtimeout second
+   mtuprobes ==    33: no response from other side, restart PMTU discovery process
+
+   Probes are sent in batches of at least three, with random sizes between the
+   lower and upper boundaries for the MTU thus far discovered.
+
+   After the initial discovery, a fourth packet is added to each batch with a
+   size larger than the currently known PMTU, to test if the PMTU has increased.
+
+   In case local discovery is enabled, another packet is added to each batch,
+   which will be broadcast to the local network.
+
+*/
 
 void send_mtu_probe(node_t *n) {
        vpn_packet_t packet;
@@ -81,16 +96,21 @@ void send_mtu_probe(node_t *n) {
        }
 
        if(n->mtuprobes > 32) {
+               if(!n->minmtu) {
+                       n->mtuprobes = 31;
+                       timeout = pinginterval;
+                       goto end;
+               }
+
                ifdebug(TRAFFIC) logger(LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
                n->mtuprobes = 1;
                n->minmtu = 0;
                n->maxmtu = MTU;
        }
 
-       if(n->mtuprobes >= 10 && !n->minmtu) {
+       if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
                ifdebug(TRAFFIC) logger(LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
-               n->mtuprobes = 0;
-               return;
+               n->mtuprobes = 31;
        }
 
        if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
@@ -110,11 +130,16 @@ void send_mtu_probe(node_t *n) {
                timeout = pingtimeout;
        }
 
-       for(i = 0; i < 3; i++) {
-               if(n->maxmtu <= n->minmtu)
+       for(i = 0; i < 4 + localdiscovery; i++) {
+               if(i == 0) {
+                       if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
+                               continue;
+                       len = n->maxmtu + 8;
+               } else if(n->maxmtu <= n->minmtu) {
                        len = n->maxmtu;
-               else
+               } else {
                        len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
+               }
 
                if(len < 64)
                        len = 64;
@@ -122,7 +147,10 @@ void send_mtu_probe(node_t *n) {
                memset(packet.data, 0, 14);
                RAND_pseudo_bytes(packet.data + 14, len - 14);
                packet.len = len;
-               packet.priority = 0;
+               if(i >= 4 && n->mtuprobes <= 10)
+                       packet.priority = -1;
+               else
+                       packet.priority = 0;
 
                ifdebug(TRAFFIC) logger(LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
 
@@ -144,12 +172,24 @@ void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
                packet->data[0] = 1;
                send_udppacket(n, packet);
        } else {
+               if(n->mtuprobes > 30) {
+                       if (len == n->maxmtu + 8) {
+                               ifdebug(TRAFFIC) logger(LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
+                               n->maxmtu = MTU;
+                               n->mtuprobes = 10;
+                               return;
+                       }
+
+                       if(n->minmtu)
+                               n->mtuprobes = 30;
+                       else
+                               n->mtuprobes = 1;
+               }
+
                if(len > n->maxmtu)
                        len = n->maxmtu;
                if(n->minmtu < len)
                        n->minmtu = len;
-               if(n->mtuprobes > 30)
-                       n->mtuprobes = 30;
        }
 }
 
@@ -292,25 +332,32 @@ static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
        inpkt->len -= sizeof(inpkt->seqno);
        inpkt->seqno = ntohl(inpkt->seqno);
 
-       if(inpkt->seqno != n->received_seqno + 1) {
-               if(inpkt->seqno >= n->received_seqno + sizeof(n->late) * 8) {
-                       logger(LOG_WARNING, "Lost %d packets from %s (%s)",
-                                          inpkt->seqno - n->received_seqno - 1, n->name, n->hostname);
-                       
-                       memset(n->late, 0, sizeof(n->late));
-               } else if (inpkt->seqno <= n->received_seqno) {
-                       if((n->received_seqno >= sizeof(n->late) * 8 && inpkt->seqno <= n->received_seqno - sizeof(n->late) * 8) || !(n->late[(inpkt->seqno / 8) % sizeof(n->late)] & (1 << inpkt->seqno % 8))) {
-                               logger(LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
-                                          n->name, n->hostname, inpkt->seqno, n->received_seqno);
-                               return;
+       if(replaywin) {
+               if(inpkt->seqno != n->received_seqno + 1) {
+                       if(inpkt->seqno >= n->received_seqno + replaywin * 8) {
+                               if(n->farfuture++ < replaywin >> 2) {
+                                       logger(LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
+                                               n->name, n->hostname, inpkt->seqno - n->received_seqno - 1, n->farfuture);
+                                       return;
+                               }
+                               logger(LOG_WARNING, "Lost %d packets from %s (%s)",
+                                               inpkt->seqno - n->received_seqno - 1, n->name, n->hostname);
+                               memset(n->late, 0, replaywin);
+                       } else if (inpkt->seqno <= n->received_seqno) {
+                               if((n->received_seqno >= replaywin * 8 && inpkt->seqno <= n->received_seqno - replaywin * 8) || !(n->late[(inpkt->seqno / 8) % replaywin] & (1 << inpkt->seqno % 8))) {
+                                       logger(LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
+                                               n->name, n->hostname, inpkt->seqno, n->received_seqno);
+                                       return;
+                               }
+                       } else {
+                               for(i = n->received_seqno + 1; i < inpkt->seqno; i++)
+                                       n->late[(i / 8) % replaywin] |= 1 << i % 8;
                        }
-               } else {
-                       for(i = n->received_seqno + 1; i < inpkt->seqno; i++)
-                               n->late[(i / 8) % sizeof(n->late)] |= 1 << i % 8;
                }
+
+               n->farfuture = 0;
+               n->late[(inpkt->seqno / 8) % replaywin] &= ~(1 << inpkt->seqno % 8);
        }
-       
-       n->late[(inpkt->seqno / 8) % sizeof(n->late)] &= ~(1 << inpkt->seqno % 8);
 
        if(inpkt->seqno > n->received_seqno)
                n->received_seqno = inpkt->seqno;
@@ -344,7 +391,7 @@ static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
                receive_packet(n, inpkt);
 }
 
-void receive_tcppacket(connection_t *c, char *buffer, int len) {
+void receive_tcppacket(connection_t *c, const char *buffer, int len) {
        vpn_packet_t outpkt;
 
        outpkt.len = len;
@@ -369,7 +416,6 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
        static int priority = 0;
 #endif
        int origpriority;
-       int sock;
 
        if(!n->status.reachable) {
                ifdebug(TRAFFIC) logger(LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
@@ -383,7 +429,7 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
                                   "No valid key known yet for %s (%s), forwarding via TCP",
                                   n->name, n->hostname);
 
-               if(n->last_req_key + 10 < now) {
+               if(n->last_req_key + 10 <= now) {
                        send_req_key(n);
                        n->last_req_key = now;
                }
@@ -456,33 +502,68 @@ static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
 
        /* Determine which socket we have to use */
 
-       for(sock = 0; sock < listen_sockets; sock++)
-               if(n->address.sa.sa_family == listen_socket[sock].sa.sa.sa_family)
-                       break;
-
-       if(sock >= listen_sockets)
-               sock = 0;                               /* If none is available, just use the first and hope for the best. */
+       if(n->address.sa.sa_family != listen_socket[n->sock].sa.sa.sa_family) {
+               for(int sock = 0; sock < listen_sockets; sock++) {
+                       if(n->address.sa.sa_family == listen_socket[sock].sa.sa.sa_family) {
+                               n->sock = sock;
+                               break;
+                       }
+               }
+       }
 
        /* Send the packet */
 
+       struct sockaddr *sa;
+       socklen_t sl;
+       int sock;
+       sockaddr_t broadcast;
+
+       /* Overloaded use of priority field: -1 means local broadcast */
+
+       if(origpriority == -1 && n->prevedge) {
+               sock = rand() % listen_sockets;
+               memset(&broadcast, 0, sizeof broadcast);
+               if(listen_socket[sock].sa.sa.sa_family == AF_INET6) {
+                       broadcast.in6.sin6_family = AF_INET6;
+                       broadcast.in6.sin6_addr.s6_addr[0x0] = 0xff;
+                       broadcast.in6.sin6_addr.s6_addr[0x1] = 0x02;
+                       broadcast.in6.sin6_addr.s6_addr[0xf] = 0x01;
+                       broadcast.in6.sin6_port = n->prevedge->address.in.sin_port;
+                       broadcast.in6.sin6_scope_id = listen_socket[sock].sa.in6.sin6_scope_id;
+               } else {
+                       broadcast.in.sin_family = AF_INET;
+                       broadcast.in.sin_addr.s_addr = -1;
+                       broadcast.in.sin_port = n->prevedge->address.in.sin_port;
+               }
+               sa = &broadcast.sa;
+               sl = SALEN(broadcast.sa);
+       } else {
+               if(origpriority == -1)
+                       origpriority = 0;
+
+               sa = &(n->address.sa);
+               sl = SALEN(n->address.sa);
+               sock = n->sock;
+       }
+
 #if defined(SOL_IP) && defined(IP_TOS)
        if(priorityinheritance && origpriority != priority
-          && listen_socket[sock].sa.sa.sa_family == AF_INET) {
+          && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
                priority = origpriority;
                ifdebug(TRAFFIC) logger(LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
-               if(setsockopt(listen_socket[sock].udp, SOL_IP, IP_TOS, &priority, sizeof(priority)))    /* SO_PRIORITY doesn't seem to work */
+               if(setsockopt(listen_socket[n->sock].udp, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
                        logger(LOG_ERR, "System call `%s' failed: %s", "setsockopt", strerror(errno));
        }
 #endif
 
-       if(sendto(listen_socket[sock].udp, (char *) &inpkt->seqno, inpkt->len, 0, &(n->address.sa), SALEN(n->address.sa)) < 0 && !sockwouldblock(sockerrno)) {
+       if(sendto(listen_socket[sock].udp, (char *) &inpkt->seqno, inpkt->len, 0, sa, sl) < 0 && !sockwouldblock(sockerrno)) {
                if(sockmsgsize(sockerrno)) {
                        if(n->maxmtu >= origlen)
                                n->maxmtu = origlen - 1;
                        if(n->mtu >= origlen)
                                n->mtu = origlen - 1;
                } else
-                       logger(LOG_ERR, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
+                       ifdebug(TRAFFIC) logger(LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
        }
 
 end:
@@ -498,7 +579,7 @@ void send_packet(const node_t *n, vpn_packet_t *packet) {
        if(n == myself) {
                if(overwrite_mac)
                         memcpy(packet->data, mymac.x, ETH_ALEN);
-               write_packet(packet);
+               devops.write(packet);
                return;
        }
 
@@ -529,24 +610,50 @@ void send_packet(const node_t *n, vpn_packet_t *packet) {
 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
        avl_node_t *node;
        connection_t *c;
+       node_t *n;
+
+       // Always give ourself a copy of the packet.
+       if(from != myself)
+               send_packet(myself, packet);
+
+       // In TunnelServer mode, do not forward broadcast packets.
+        // The MST might not be valid and create loops.
+       if(tunnelserver || broadcast_mode == BMODE_NONE)
+               return;
 
        ifdebug(TRAFFIC) logger(LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
                           packet->len, from->name, from->hostname);
 
-       if(from != myself) {
-               send_packet(myself, packet);
+       switch(broadcast_mode) {
+               // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
+               // This guarantees all nodes receive the broadcast packet, and
+               // usually distributes the sending of broadcast packets over all nodes.
+               case BMODE_MST:
+                       for(node = connection_tree->head; node; node = node->next) {
+                               c = node->data;
 
-               // In TunnelServer mode, do not forward broadcast packets.
-                // The MST might not be valid and create loops.
-               if(tunnelserver)
-                       return;
-       }
+                               if(c->status.active && c->status.mst && c != from->nexthop->connection)
+                                       send_packet(c->node, packet);
+                       }
+                       break;
+
+               // In direct mode, we send copies to each node we know of.
+               // However, this only reaches nodes that can be reached in a single hop.
+               // We don't have enough information to forward broadcast packets in this case.
+               case BMODE_DIRECT:
+                       if(from != myself)
+                               break;
 
-       for(node = connection_tree->head; node; node = node->next) {
-               c = node->data;
+                       for(node = node_udp_tree->head; node; node = node->next) {
+                               n = node->data;
 
-               if(c->status.active && c->status.mst && c != from->nexthop->connection)
-                       send_packet(c->node, packet);
+                               if(n->status.reachable && ((n->via == myself && n->nexthop == n) || n->via == n))
+                                       send_packet(n, packet);
+                       }
+                       break;
+
+               default:
+                       break;
        }
 }
 
@@ -554,20 +661,21 @@ static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
        avl_node_t *node;
        edge_t *e;
        node_t *n = NULL;
+       bool hard = false;
        static time_t last_hard_try = 0;
 
        for(node = edge_weight_tree->head; node; node = node->next) {
                e = node->data;
 
+               if(e->to == myself)
+                       continue;
+
                if(sockaddrcmp_noport(from, &e->address)) {
                        if(last_hard_try == now)
                                continue;
-                       last_hard_try = now;
+                       hard = true;
                }
 
-               if(!n)
-                       n = e->to;
-
                if(!try_mac(e->to, pkt))
                        continue;
 
@@ -575,6 +683,10 @@ static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
                break;
        }
 
+       if(hard)
+               last_hard_try = now;
+
+       last_hard_try = now;
        return n;
 }
 
@@ -585,7 +697,7 @@ void handle_incoming_vpn_data(int sock) {
        socklen_t fromlen = sizeof(from);
        node_t *n;
 
-       pkt.len = recvfrom(sock, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
+       pkt.len = recvfrom(listen_socket[sock].udp, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
 
        if(pkt.len < 0) {
                if(!sockwouldblock(sockerrno))
@@ -611,5 +723,7 @@ void handle_incoming_vpn_data(int sock) {
                        return;
        }
 
+       n->sock = sock;
+
        receive_udppacket(n, &pkt);
 }