Fix combination of Mode = router and DeviceType = tap on Linux.
[tinc] / src / route.c
index b586157..e196f44 100644 (file)
@@ -1,7 +1,7 @@
 /*
     route.c -- routing
-    Copyright (C) 2000-2003 Ivo Timmermans <ivo@o2w.nl>,
-                  2000-2003 Guus Sliepen <guus@sliepen.eu.org>
+    Copyright (C) 2000-2005 Ivo Timmermans,
+                  2000-2013 Guus Sliepen <guus@tinc-vpn.org>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     GNU General Public License for more details.
 
-    You should have received a copy of the GNU General Public License
-    along with this program; if not, write to the Free Software
-    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-    $Id: route.c,v 1.1.2.69 2003/12/08 12:00:40 guus Exp $
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
 
 #include "system.h"
 
-#ifdef HAVE_NET_ETHERNET_H
-#include <net/ethernet.h>
-#endif
-#ifdef HAVE_NET_IF_ARP_H
-#include <net/if_arp.h>
-#endif
-#ifdef HAVE_NETINET_IP_ICMP_H
-#include <netinet/ip_icmp.h>
-#endif
-#ifdef HAVE_NETINET_ICMP6_H
-#include <netinet/icmp6.h>
-#endif
-#ifdef HAVE_NETINET_IF_ETHER_H
-#include <netinet/if_ether.h>
-#endif
-
 #include "avl_tree.h"
 #include "connection.h"
-#include "device.h"
 #include "ethernet.h"
 #include "ipv4.h"
 #include "ipv6.h"
 #include "utils.h"
 
 rmode_t routing_mode = RMODE_ROUTER;
+fmode_t forwarding_mode = FMODE_INTERNAL;
+bmode_t broadcast_mode = BMODE_MST;
+bool decrement_ttl = false;
+bool directonly = false;
 bool priorityinheritance = false;
 int macexpire = 600;
 bool overwrite_mac = false;
@@ -68,10 +53,13 @@ static const size_t icmp6_size = sizeof(struct icmp6_hdr);
 static const size_t ns_size = sizeof(struct nd_neighbor_solicit);
 static const size_t opt_size = sizeof(struct nd_opt_hdr);
 
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
 /* RFC 1071 */
 
-static uint16_t inet_checksum(void *data, int len, uint16_t prevsum)
-{
+static uint16_t inet_checksum(void *data, int len, uint16_t prevsum) {
        uint16_t *p = data;
        uint32_t checksum = prevsum ^ 0xFFFF;
 
@@ -81,7 +69,7 @@ static uint16_t inet_checksum(void *data, int len, uint16_t prevsum)
        }
        
        if(len)
-               checksum += *(unsigned char *)p;
+               checksum += *(uint8_t *)p;
 
        while(checksum >> 16)
                checksum = (checksum & 0xFFFF) + (checksum >> 16);
@@ -94,37 +82,132 @@ static bool ratelimit(int frequency) {
        static int count = 0;
        
        if(lasttime == now) {
-               if(++count > frequency)
+               if(count >= frequency)
                        return true;
        } else {
                lasttime = now;
                count = 0;
        }
 
+       count++;
        return false;
 }
+
+static bool checklength(node_t *source, vpn_packet_t *packet, length_t length) {
+       if(packet->len < length) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Got too short packet from %s (%s)", source->name, source->hostname);
+               return false;
+       } else
+               return true;
+}
+
+static void clamp_mss(const node_t *source, const node_t *via, vpn_packet_t *packet) {
+       if(!source || !via || !(via->options & OPTION_CLAMP_MSS))
+               return;
+
+       uint16_t mtu = source->mtu;
+       if(via != myself && via->mtu < mtu)
+               mtu = via->mtu;
+
+       /* Find TCP header */
+       int start = ether_size;
+       uint16_t type = packet->data[12] << 8 | packet->data[13];
+
+       if(type == ETH_P_8021Q) {
+               start += 4;
+               type = packet->data[16] << 8 | packet->data[17];
+       }
+
+       if(type == ETH_P_IP && packet->data[start + 9] == 6)
+               start += (packet->data[start] & 0xf) * 4;
+       else if(type == ETH_P_IPV6 && packet->data[start + 6] == 6)
+               start += 40;
+       else
+               return;
+
+       if(packet->len <= start + 20)
+               return;
+
+       /* Use data offset field to calculate length of options field */
+       int len = ((packet->data[start + 12] >> 4) - 5) * 4;
+
+       if(packet->len < start + 20 + len)
+               return;
+
+       /* Search for MSS option header */
+       for(int i = 0; i < len;) {
+               if(packet->data[start + 20 + i] == 0)
+                       break;
+
+               if(packet->data[start + 20 + i] == 1) {
+                       i++;
+                       continue;
+               }
+
+               if(i > len - 2 || i > len - packet->data[start + 21 + i])
+                       break;
+
+               if(packet->data[start + 20 + i] != 2) {
+                       if(packet->data[start + 21 + i] < 2)
+                               break;
+                       i += packet->data[start + 21 + i];
+                       continue;
+               }
+
+               if(packet->data[start + 21] != 4)
+                       break;
+
+               /* Found it */
+               uint16_t oldmss = packet->data[start + 22 + i] << 8 | packet->data[start + 23 + i];
+               uint16_t newmss = mtu - start - 20;
+               uint16_t csum = packet->data[start + 16] << 8 | packet->data[start + 17];
+
+               if(oldmss <= newmss)
+                       break;
+               
+               ifdebug(TRAFFIC) logger(LOG_INFO, "Clamping MSS of packet from %s to %s to %d", source->name, via->name, newmss);
+
+               /* Update the MSS value and the checksum */
+               packet->data[start + 22 + i] = newmss >> 8;
+               packet->data[start + 23 + i] = newmss & 0xff;
+               csum ^= 0xffff;
+               csum -= oldmss;
+               csum += newmss;
+               csum ^= 0xffff;
+               packet->data[start + 16] = csum >> 8;
+               packet->data[start + 17] = csum & 0xff;
+               break;
+       }
+}
+
+static void swap_mac_addresses(vpn_packet_t *packet) {
+       mac_t tmp;
+       memcpy(&tmp, &packet->data[0], sizeof tmp);
+       memcpy(&packet->data[0], &packet->data[6], sizeof tmp);
+       memcpy(&packet->data[6], &tmp, sizeof tmp);
+}
        
-static void learn_mac(mac_t *address)
-{
+static void learn_mac(mac_t *address) {
        subnet_t *subnet;
        avl_node_t *node;
        connection_t *c;
 
-       cp();
-
-       subnet = lookup_subnet_mac(address);
+       subnet = lookup_subnet_mac(myself, address);
 
        /* If we don't know this MAC address yet, store it */
 
-       if(!subnet || subnet->owner != myself) {
-               ifdebug(TRAFFIC) logger(LOG_INFO, _("Learned new MAC address %hx:%hx:%hx:%hx:%hx:%hx"),
+       if(!subnet) {
+               ifdebug(TRAFFIC) logger(LOG_INFO, "Learned new MAC address %hx:%hx:%hx:%hx:%hx:%hx",
                                   address->x[0], address->x[1], address->x[2], address->x[3],
                                   address->x[4], address->x[5]);
 
                subnet = new_subnet();
                subnet->type = SUBNET_MAC;
-               memcpy(&subnet->net.mac.address, address, sizeof(mac_t));
+               subnet->expires = now + macexpire;
+               subnet->net.mac.address = *address;
+               subnet->weight = 10;
                subnet_add(myself, subnet);
+               subnet_update(myself, subnet, true);
 
                /* And tell all other tinc daemons it's our MAC */
 
@@ -135,25 +218,24 @@ static void learn_mac(mac_t *address)
                }
        }
 
-       subnet->net.mac.lastseen = now;
+       if(subnet->expires)
+               subnet->expires = now + macexpire;
 }
 
-void age_mac(void)
-{
+void age_subnets(void) {
        subnet_t *s;
        connection_t *c;
        avl_node_t *node, *next, *node2;
 
-       cp();
-
        for(node = myself->subnet_tree->head; node; node = next) {
                next = node->next;
                s = node->data;
-               if(s->type == SUBNET_MAC && s->net.mac.lastseen && s->net.mac.lastseen + macexpire < now) {
-                       ifdebug(TRAFFIC) logger(LOG_INFO, _("MAC address %hx:%hx:%hx:%hx:%hx:%hx expired"),
-                                          s->net.mac.address.x[0], s->net.mac.address.x[1],
-                                          s->net.mac.address.x[2], s->net.mac.address.x[3],
-                                          s->net.mac.address.x[4], s->net.mac.address.x[5]);
+               if(s->expires && s->expires <= now) {
+                       ifdebug(TRAFFIC) {
+                               char netstr[MAXNETSTR];
+                               if(net2str(netstr, sizeof netstr, s))
+                                       logger(LOG_INFO, "Subnet %s expired", netstr);
+                       }
 
                        for(node2 = connection_tree->head; node2; node2 = node2->next) {
                                c = node2->data;
@@ -161,37 +243,17 @@ void age_mac(void)
                                        send_del_subnet(c, s);
                        }
 
+                       subnet_update(myself, s, false);
                        subnet_del(myself, s);
                }
        }
 }
 
-static node_t *route_mac(vpn_packet_t *packet)
-{
-       subnet_t *subnet;
-
-       cp();
-
-       /* Learn source address */
-
-       learn_mac((mac_t *)(&packet->data[6]));
-
-       /* Lookup destination address */
-
-       subnet = lookup_subnet_mac((mac_t *)(&packet->data[0]));
-
-       if(subnet)
-               return subnet->owner;
-       else
-               return NULL;
-}
-
 /* RFC 792 */
 
-static void route_ipv4_unreachable(vpn_packet_t *packet, uint8_t code)
-{
-       struct ip ip;
-       struct icmp icmp;
+static void route_ipv4_unreachable(node_t *source, vpn_packet_t *packet, length_t ether_size, uint8_t type, uint8_t code) {
+       struct ip ip = {0};
+       struct icmp icmp = {0};
        
        struct in_addr ip_src;
        struct in_addr ip_dst;
@@ -200,20 +262,24 @@ static void route_ipv4_unreachable(vpn_packet_t *packet, uint8_t code)
        if(ratelimit(3))
                return;
        
-       cp();
+       /* Swap Ethernet source and destination addresses */
+
+       swap_mac_addresses(packet);
 
        /* Copy headers from packet into properly aligned structs on the stack */
 
        memcpy(&ip, packet->data + ether_size, ip_size);
-       memcpy(&icmp, packet->data + ether_size + ip_size, icmp_size);
 
        /* Remember original source and destination */
-               
-       memcpy(&ip_src, &ip.ip_src, sizeof(ip_src));
-       memcpy(&ip_dst, &ip.ip_dst, sizeof(ip_dst));
+       
+       ip_src = ip.ip_src;
+       ip_dst = ip.ip_dst;
 
        oldlen = packet->len - ether_size;
-       
+
+       if(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)
+               icmp.icmp_nextmtu = htons(packet->len - ether_size);
+
        if(oldlen >= IP_MSS - ip_size - icmp_size)
                oldlen = IP_MSS - ip_size - icmp_size;
        
@@ -232,14 +298,14 @@ static void route_ipv4_unreachable(vpn_packet_t *packet, uint8_t code)
        ip.ip_ttl = 255;
        ip.ip_p = IPPROTO_ICMP;
        ip.ip_sum = 0;
-       memcpy(&ip.ip_src, &ip_dst, sizeof(ip_src));
-       memcpy(&ip.ip_dst, &ip_src, sizeof(ip_dst));
+       ip.ip_src = ip_dst;
+       ip.ip_dst = ip_src;
 
        ip.ip_sum = inet_checksum(&ip, ip_size, ~0);
        
        /* Fill in ICMP header */
        
-       icmp.icmp_type = ICMP_DEST_UNREACH;
+       icmp.icmp_type = type;
        icmp.icmp_code = code;
        icmp.icmp_cksum = 0;
        
@@ -252,42 +318,140 @@ static void route_ipv4_unreachable(vpn_packet_t *packet, uint8_t code)
        memcpy(packet->data + ether_size + ip_size, &icmp, icmp_size);
        
        packet->len = ether_size + ip_size + icmp_size + oldlen;
+
+       send_packet(source, packet);
+}
+
+/* RFC 791 */
+
+static void fragment_ipv4_packet(node_t *dest, vpn_packet_t *packet, length_t ether_size) {
+       struct ip ip;
+       vpn_packet_t fragment;
+       int len, maxlen, todo;
+       uint8_t *offset;
+       uint16_t ip_off, origf;
        
-       write_packet(packet);
+       memcpy(&ip, packet->data + ether_size, ip_size);
+       fragment.priority = packet->priority;
+
+       if(ip.ip_hl != ip_size / 4)
+               return;
+       
+       todo = ntohs(ip.ip_len) - ip_size;
+
+       if(ether_size + ip_size + todo != packet->len) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Length of packet (%d) doesn't match length in IPv4 header (%d)", packet->len, (int)(ether_size + ip_size + todo));
+               return;
+       }
+
+       ifdebug(TRAFFIC) logger(LOG_INFO, "Fragmenting packet of %d bytes to %s (%s)", packet->len, dest->name, dest->hostname);
+
+       offset = packet->data + ether_size + ip_size;
+       maxlen = (dest->mtu - ether_size - ip_size) & ~0x7;
+       ip_off = ntohs(ip.ip_off);
+       origf = ip_off & ~IP_OFFMASK;
+       ip_off &= IP_OFFMASK;
+       
+       while(todo) {
+               len = todo > maxlen ? maxlen : todo;
+               memcpy(fragment.data + ether_size + ip_size, offset, len);
+               todo -= len;
+               offset += len;
+
+               ip.ip_len = htons(ip_size + len);
+               ip.ip_off = htons(ip_off | origf | (todo ? IP_MF : 0));
+               ip.ip_sum = 0;
+               ip.ip_sum = inet_checksum(&ip, ip_size, ~0);
+               memcpy(fragment.data, packet->data, ether_size);
+               memcpy(fragment.data + ether_size, &ip, ip_size);
+               fragment.len = ether_size + ip_size + len;
+
+               send_packet(dest, &fragment);
+
+               ip_off += len / 8;
+       }       
 }
 
-static node_t *route_ipv4(vpn_packet_t *packet)
-{
+static void route_ipv4_unicast(node_t *source, vpn_packet_t *packet) {
        subnet_t *subnet;
+       node_t *via;
+       ipv4_t dest;
 
-       cp();
+       memcpy(&dest, &packet->data[30], sizeof dest);
+       subnet = lookup_subnet_ipv4(&dest);
+
+       if(!subnet) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet from %s (%s): unknown IPv4 destination address %d.%d.%d.%d",
+                               source->name, source->hostname,
+                               dest.x[0],
+                               dest.x[1],
+                               dest.x[2],
+                               dest.x[3]);
+
+               route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_UNKNOWN);
+               return;
+       }
+       
+       if(subnet->owner == source) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
+               return;
+       }
+
+       if(!subnet->owner->status.reachable)
+               return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_UNREACH);
+
+       if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
+               return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_ANO);
 
        if(priorityinheritance)
                packet->priority = packet->data[15];
 
-       subnet = lookup_subnet_ipv4((ipv4_t *) &packet->data[30]);
-
-       if(!subnet) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: unknown IPv4 destination address %d.%d.%d.%d"),
-                                  packet->data[30], packet->data[31], packet->data[32],
-                                  packet->data[33]);
+       via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
 
-               route_ipv4_unreachable(packet, ICMP_NET_UNKNOWN);
-               return NULL;
+       if(via == source) {
+               ifdebug(TRAFFIC) logger(LOG_ERR, "Routing loop for packet from %s (%s)!", source->name, source->hostname);
+               return;
        }
        
-       if(!subnet->owner->status.reachable)
-               route_ipv4_unreachable(packet, ICMP_NET_UNREACH);
+       if(directonly && subnet->owner != via)
+               return route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_NET_ANO);
+
+       if(via && packet->len > MAX(via->mtu, 590) && via != myself) {
+               ifdebug(TRAFFIC) logger(LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
+               if(packet->data[20] & 0x40) {
+                       packet->len = MAX(via->mtu, 590);
+                       route_ipv4_unreachable(source, packet, ether_size, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED);
+               } else {
+                       fragment_ipv4_packet(via, packet, ether_size);
+               }
 
-       return subnet->owner;
+               return;
+       }
+
+       clamp_mss(source, via, packet);
+       send_packet(subnet->owner, packet);
+}
+
+static void route_ipv4(node_t *source, vpn_packet_t *packet) {
+       if(!checklength(source, packet, ether_size + ip_size))
+               return;
+
+       if(broadcast_mode && (((packet->data[30] & 0xf0) == 0xe0) || (
+                       packet->data[30] == 255 &&
+                       packet->data[31] == 255 &&
+                       packet->data[32] == 255 &&
+                       packet->data[33] == 255)))
+               broadcast_packet(source, packet);
+       else
+               route_ipv4_unicast(source, packet);
 }
 
 /* RFC 2463 */
 
-static void route_ipv6_unreachable(vpn_packet_t *packet, uint8_t code)
-{
+static void route_ipv6_unreachable(node_t *source, vpn_packet_t *packet, length_t ether_size, uint8_t type, uint8_t code) {
        struct ip6_hdr ip6;
-       struct icmp6_hdr icmp6;
+       struct icmp6_hdr icmp6 = {0};
        uint16_t checksum;      
 
        struct {
@@ -300,19 +464,23 @@ static void route_ipv6_unreachable(vpn_packet_t *packet, uint8_t code)
        if(ratelimit(3))
                return;
        
-       cp();
+       /* Swap Ethernet source and destination addresses */
+
+       swap_mac_addresses(packet);
 
        /* Copy headers from packet to structs on the stack */
 
        memcpy(&ip6, packet->data + ether_size, ip6_size);
-       memcpy(&icmp6, packet->data + ether_size + ip6_size, icmp6_size);
 
        /* Remember original source and destination */
-               
-       memcpy(&pseudo.ip6_src, &ip6.ip6_dst, sizeof(ip6.ip6_src));
-       memcpy(&pseudo.ip6_dst, &ip6.ip6_src, sizeof(ip6.ip6_dst));
+       
+       pseudo.ip6_src = ip6.ip6_dst;
+       pseudo.ip6_dst = ip6.ip6_src;
+
+       pseudo.length = packet->len - ether_size;
 
-       pseudo.length = ntohs(ip6.ip6_plen) + ip6_size;
+       if(type == ICMP6_PACKET_TOO_BIG)
+               icmp6.icmp6_mtu = htonl(pseudo.length);
        
        if(pseudo.length >= IP_MSS - ip6_size - icmp6_size)
                pseudo.length = IP_MSS - ip6_size - icmp6_size;
@@ -327,12 +495,12 @@ static void route_ipv6_unreachable(vpn_packet_t *packet, uint8_t code)
        ip6.ip6_plen = htons(icmp6_size + pseudo.length);
        ip6.ip6_nxt = IPPROTO_ICMPV6;
        ip6.ip6_hlim = 255;
-       memcpy(&ip6.ip6_src, &pseudo.ip6_src, sizeof(ip6.ip6_src));
-       memcpy(&ip6.ip6_dst, &pseudo.ip6_dst, sizeof(ip6.ip6_dst));
+       ip6.ip6_src = pseudo.ip6_src;
+       ip6.ip6_dst = pseudo.ip6_dst;
 
        /* Fill in ICMP header */
        
-       icmp6.icmp6_type = ICMP6_DST_UNREACH;
+       icmp6.icmp6_type = type;
        icmp6.icmp6_code = code;
        icmp6.icmp6_cksum = 0;
 
@@ -356,47 +524,75 @@ static void route_ipv6_unreachable(vpn_packet_t *packet, uint8_t code)
        
        packet->len = ether_size + ip6_size + ntohl(pseudo.length);
        
-       write_packet(packet);
+       send_packet(source, packet);
 }
 
-static node_t *route_ipv6(vpn_packet_t *packet)
-{
+static void route_ipv6_unicast(node_t *source, vpn_packet_t *packet) {
        subnet_t *subnet;
+       node_t *via;
+       ipv6_t dest;
 
-       cp();
-
-       subnet = lookup_subnet_ipv6((ipv6_t *) &packet->data[38]);
+       memcpy(&dest, &packet->data[38], sizeof dest);
+       subnet = lookup_subnet_ipv6(&dest);
 
        if(!subnet) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: unknown IPv6 destination address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx"),
-                                  ntohs(*(uint16_t *) &packet->data[38]),
-                                  ntohs(*(uint16_t *) &packet->data[40]),
-                                  ntohs(*(uint16_t *) &packet->data[42]),
-                                  ntohs(*(uint16_t *) &packet->data[44]),
-                                  ntohs(*(uint16_t *) &packet->data[46]),
-                                  ntohs(*(uint16_t *) &packet->data[48]),
-                                  ntohs(*(uint16_t *) &packet->data[50]),
-                                  ntohs(*(uint16_t *) &packet->data[52]));
-               route_ipv6_unreachable(packet, ICMP6_DST_UNREACH_ADDR);
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet from %s (%s): unknown IPv6 destination address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
+                               source->name, source->hostname,
+                               ntohs(dest.x[0]),
+                               ntohs(dest.x[1]),
+                               ntohs(dest.x[2]),
+                               ntohs(dest.x[3]),
+                               ntohs(dest.x[4]),
+                               ntohs(dest.x[5]),
+                               ntohs(dest.x[6]),
+                               ntohs(dest.x[7]));
+
+               route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADDR);
+               return;
+       }
 
-               return NULL;
+       if(subnet->owner == source) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
+               return;
        }
 
        if(!subnet->owner->status.reachable)
-               route_ipv6_unreachable(packet, ICMP6_DST_UNREACH_NOROUTE);
+               return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_NOROUTE);
+
+       if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
+               return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN);
+
+       via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
+       
+       if(via == source) {
+               ifdebug(TRAFFIC) logger(LOG_ERR, "Routing loop for packet from %s (%s)!", source->name, source->hostname);
+               return;
+       }
        
-       return subnet->owner;
+       if(directonly && subnet->owner != via)
+               return route_ipv6_unreachable(source, packet, ether_size, ICMP6_DST_UNREACH, ICMP6_DST_UNREACH_ADMIN);
+
+       if(via && packet->len > MAX(via->mtu, 1294) && via != myself) {
+               ifdebug(TRAFFIC) logger(LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
+               packet->len = MAX(via->mtu, 1294);
+               route_ipv6_unreachable(source, packet, ether_size, ICMP6_PACKET_TOO_BIG, 0);
+               return;
+       }
+
+       clamp_mss(source, via, packet);
+       send_packet(subnet->owner, packet);
 }
 
 /* RFC 2461 */
 
-static void route_neighborsol(vpn_packet_t *packet)
-{
+static void route_neighborsol(node_t *source, vpn_packet_t *packet) {
        struct ip6_hdr ip6;
        struct nd_neighbor_solicit ns;
        struct nd_opt_hdr opt;
        subnet_t *subnet;
        uint16_t checksum;
+       bool has_opt;
 
        struct {
                struct in6_addr ip6_src;        /* source address */
@@ -405,13 +601,22 @@ static void route_neighborsol(vpn_packet_t *packet)
                uint32_t next;
        } pseudo;
 
-       cp();
+       if(!checklength(source, packet, ether_size + ip6_size + ns_size))
+               return;
+       
+       has_opt = packet->len >= ether_size + ip6_size + ns_size + opt_size + ETH_ALEN;
+       
+       if(source != myself) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Got neighbor solicitation request from %s (%s) while in router mode!", source->name, source->hostname);
+               return;
+       }
 
        /* Copy headers from packet to structs on the stack */
 
        memcpy(&ip6, packet->data + ether_size, ip6_size);
        memcpy(&ns, packet->data + ether_size + ip6_size, ns_size);
-       memcpy(&opt, packet->data + ether_size + ip6_size + ns_size, opt_size);
+       if(has_opt)
+               memcpy(&opt, packet->data + ether_size + ip6_size + ns_size, opt_size);
 
        /* First, snatch the source address from the neighbor solicitation packet */
 
@@ -421,27 +626,32 @@ static void route_neighborsol(vpn_packet_t *packet)
        /* Check if this is a valid neighbor solicitation request */
 
        if(ns.nd_ns_hdr.icmp6_type != ND_NEIGHBOR_SOLICIT ||
-          opt.nd_opt_type != ND_OPT_SOURCE_LINKADDR) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: received unknown type neighbor solicitation request"));
+          (has_opt && opt.nd_opt_type != ND_OPT_SOURCE_LINKADDR)) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: received unknown type neighbor solicitation request");
                return;
        }
 
        /* Create pseudo header */
 
-       memcpy(&pseudo.ip6_src, &ip6.ip6_src, sizeof(ip6.ip6_src));
-       memcpy(&pseudo.ip6_dst, &ip6.ip6_dst, sizeof(ip6.ip6_dst));
-       pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
+       pseudo.ip6_src = ip6.ip6_src;
+       pseudo.ip6_dst = ip6.ip6_dst;
+       if(has_opt)
+               pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
+       else
+               pseudo.length = htonl(ns_size);
        pseudo.next = htonl(IPPROTO_ICMPV6);
 
        /* Generate checksum */
 
        checksum = inet_checksum(&pseudo, sizeof(pseudo), ~0);
        checksum = inet_checksum(&ns, ns_size, checksum);
-       checksum = inet_checksum(&opt, opt_size, checksum);
-       checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
+       if(has_opt) {
+               checksum = inet_checksum(&opt, opt_size, checksum);
+               checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
+       }
 
        if(checksum) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: checksum error for neighbor solicitation request"));
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: checksum error for neighbor solicitation request");
                return;
        }
 
@@ -450,7 +660,7 @@ static void route_neighborsol(vpn_packet_t *packet)
        subnet = lookup_subnet_ipv6((ipv6_t *) &ns.nd_ns_target);
 
        if(!subnet) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: neighbor solicitation request for unknown address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx"),
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: neighbor solicitation request for unknown address %hx:%hx:%hx:%hx:%hx:%hx:%hx:%hx",
                                   ntohs(((uint16_t *) &ns.nd_ns_target)[0]),
                                   ntohs(((uint16_t *) &ns.nd_ns_target)[1]),
                                   ntohs(((uint16_t *) &ns.nd_ns_target)[2]),
@@ -473,10 +683,11 @@ static void route_neighborsol(vpn_packet_t *packet)
        memcpy(packet->data, packet->data + ETH_ALEN, ETH_ALEN);        /* copy destination address */
        packet->data[ETH_ALEN * 2 - 1] ^= 0xFF; /* mangle source address so it looks like it's not from us */
 
-       memcpy(&ip6.ip6_dst, &ip6.ip6_src, sizeof(ip6.ip6_dst));        /* ... */
-       memcpy(&ip6.ip6_src, &ns.nd_ns_target, sizeof(ip6.ip6_src));    /* swap destination and source protocol address */
+       ip6.ip6_dst = ip6.ip6_src;                      /* swap destination and source protocoll address */
+       ip6.ip6_src = ns.nd_ns_target;
 
-       memcpy(packet->data + ether_size + ip6_size + ns_size + opt_size, packet->data + ETH_ALEN, ETH_ALEN);   /* add fake source hard addr */
+       if(has_opt)
+               memcpy(packet->data + ether_size + ip6_size + ns_size + opt_size, packet->data + ETH_ALEN, ETH_ALEN);   /* add fake source hard addr */
 
        ns.nd_ns_cksum = 0;
        ns.nd_ns_type = ND_NEIGHBOR_ADVERT;
@@ -485,17 +696,22 @@ static void route_neighborsol(vpn_packet_t *packet)
 
        /* Create pseudo header */
 
-       memcpy(&pseudo.ip6_src, &ip6.ip6_src, sizeof(ip6.ip6_src));
-       memcpy(&pseudo.ip6_dst, &ip6.ip6_dst, sizeof(ip6.ip6_dst));
-       pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
+       pseudo.ip6_src = ip6.ip6_src;
+       pseudo.ip6_dst = ip6.ip6_dst;
+       if(has_opt)
+               pseudo.length = htonl(ns_size + opt_size + ETH_ALEN);
+       else
+               pseudo.length = htonl(ns_size);
        pseudo.next = htonl(IPPROTO_ICMPV6);
 
        /* Generate checksum */
 
        checksum = inet_checksum(&pseudo, sizeof(pseudo), ~0);
        checksum = inet_checksum(&ns, ns_size, checksum);
-       checksum = inet_checksum(&opt, opt_size, checksum);
-       checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
+       if(has_opt) {
+               checksum = inet_checksum(&opt, opt_size, checksum);
+               checksum = inet_checksum(packet->data + ether_size + ip6_size + ns_size + opt_size, ETH_ALEN, checksum);
+       }
 
        ns.nd_ns_hdr.icmp6_cksum = checksum;
 
@@ -503,20 +719,41 @@ static void route_neighborsol(vpn_packet_t *packet)
 
        memcpy(packet->data + ether_size, &ip6, ip6_size);
        memcpy(packet->data + ether_size + ip6_size, &ns, ns_size);
-       memcpy(packet->data + ether_size + ip6_size + ns_size, &opt, opt_size);
+       if(has_opt)
+               memcpy(packet->data + ether_size + ip6_size + ns_size, &opt, opt_size);
 
-       write_packet(packet);
+       send_packet(source, packet);
+}
+
+static void route_ipv6(node_t *source, vpn_packet_t *packet) {
+       if(!checklength(source, packet, ether_size + ip6_size))
+               return;
+
+       if(packet->data[20] == IPPROTO_ICMPV6 && checklength(source, packet, ether_size + ip6_size + icmp6_size) && packet->data[54] == ND_NEIGHBOR_SOLICIT) {
+               route_neighborsol(source, packet);
+               return;
+       }
+
+       if(broadcast_mode && packet->data[38] == 255)
+               broadcast_packet(source, packet);
+       else
+               route_ipv6_unicast(source, packet);
 }
 
 /* RFC 826 */
 
-static void route_arp(vpn_packet_t *packet)
-{
+static void route_arp(node_t *source, vpn_packet_t *packet) {
        struct ether_arp arp;
        subnet_t *subnet;
        struct in_addr addr;
 
-       cp();
+       if(!checklength(source, packet, ether_size + arp_size))
+               return;
+
+       if(source != myself) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Got ARP request from %s (%s) while in router mode!", source->name, source->hostname);
+               return;
+       }
 
        /* First, snatch the source address from the ARP packet */
 
@@ -531,7 +768,7 @@ static void route_arp(vpn_packet_t *packet)
 
        if(ntohs(arp.arp_hrd) != ARPHRD_ETHER || ntohs(arp.arp_pro) != ETH_P_IP ||
           arp.arp_hln != ETH_ALEN || arp.arp_pln != sizeof(addr) || ntohs(arp.arp_op) != ARPOP_REQUEST) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: received unknown type ARP request"));
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: received unknown type ARP request");
                return;
        }
 
@@ -540,7 +777,7 @@ static void route_arp(vpn_packet_t *packet)
        subnet = lookup_subnet_ipv4((ipv4_t *) &arp.arp_tpa);
 
        if(!subnet) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: ARP request for unknown address %d.%d.%d.%d"),
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet: ARP request for unknown address %d.%d.%d.%d",
                                   arp.arp_tpa[0], arp.arp_tpa[1], arp.arp_tpa[2],
                                   arp.arp_tpa[3]);
                return;
@@ -566,150 +803,176 @@ static void route_arp(vpn_packet_t *packet)
 
        memcpy(packet->data + ether_size, &arp, arp_size);
 
-       write_packet(packet);
+       send_packet(source, packet);
 }
 
-void route_outgoing(vpn_packet_t *packet)
-{
-       uint16_t type;
-       node_t *n = NULL;
+static void route_mac(node_t *source, vpn_packet_t *packet) {
+       subnet_t *subnet;
+       mac_t dest;
+
+       /* Learn source address */
+
+       if(source == myself) {
+               mac_t src;
+               memcpy(&src, &packet->data[6], sizeof src);
+               learn_mac(&src);
+       }
+
+       /* Lookup destination address */
 
-       cp();
+       memcpy(&dest, &packet->data[0], sizeof dest);
+       subnet = lookup_subnet_mac(NULL, &dest);
 
-       if(packet->len < ether_size) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Read too short packet"));
+       if(!subnet) {
+               broadcast_packet(source, packet);
                return;
        }
 
-       /* FIXME: multicast? */
+       if(subnet->owner == source) {
+               ifdebug(TRAFFIC) logger(LOG_WARNING, "Packet looping back to %s (%s)!", source->name, source->hostname);
+               return;
+       }
 
-       switch (routing_mode) {
-               case RMODE_ROUTER:
-                       type = ntohs(*((uint16_t *)(&packet->data[12])));
-                       switch (type) {
-                               case ETH_P_IP:
-                                       if(packet->len < ether_size + ip_size) {
-                                               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Read too short packet"));
-                                               return;
-                                       }
-
-                                       n = route_ipv4(packet);
-                                       break;
-
-                               case ETH_P_IPV6:
-                                       if(packet->len < ether_size + ip6_size) {
-                                               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Read too short packet"));
-                                               return;
-                                       }
-
-                                       if(packet->data[20] == IPPROTO_ICMPV6 && packet->len >= ether_size + ip6_size + ns_size && packet->data[54] == ND_NEIGHBOR_SOLICIT) {
-                                               route_neighborsol(packet);
-                                               return;
-                                       }
-                                       n = route_ipv6(packet);
-                                       break;
-
-                               case ETH_P_ARP:
-                                       if(packet->len < ether_size + arp_size) {
-                                               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Read too short packet"));
-                                               return;
-                                       }
-
-                                       route_arp(packet);
-                                       return;
-
-                               default:
-                                       ifdebug(TRAFFIC) logger(LOG_WARNING, _("Cannot route packet: unknown type %hx"), type);
-                                       return;
+       if(forwarding_mode == FMODE_OFF && source != myself && subnet->owner != myself)
+               return;
+
+       uint16_t type = packet->data[12] << 8 | packet->data[13];
+
+       if(priorityinheritance && type == ETH_P_IP && packet->len >= ether_size + ip_size)
+               packet->priority = packet->data[15];
+
+       // Handle packets larger than PMTU
+
+       node_t *via = (subnet->owner->via == myself) ? subnet->owner->nexthop : subnet->owner->via;
+
+       if(directonly && subnet->owner != via)
+               return;
+       
+       if(via && packet->len > via->mtu && via != myself) {
+               ifdebug(TRAFFIC) logger(LOG_INFO, "Packet for %s (%s) length %d larger than MTU %d", subnet->owner->name, subnet->owner->hostname, packet->len, via->mtu);
+               length_t ethlen = 14;
+
+               if(type == ETH_P_8021Q) {
+                       type = packet->data[16] << 8 | packet->data[17];
+                       ethlen += 4;
+               }
+
+               if(type == ETH_P_IP && packet->len > 576 + ethlen) {
+                       if(packet->data[6 + ethlen] & 0x40) {
+                               packet->len = via->mtu;
+                               route_ipv4_unreachable(source, packet, ethlen, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED);
+                       } else {
+                               fragment_ipv4_packet(via, packet, ethlen);
                        }
-                       if(n)
-                               send_packet(n, packet);
-                       break;
+                       return;
+               } else if(type == ETH_P_IPV6 && packet->len > 1280 + ethlen) {
+                       packet->len = via->mtu;
+                       route_ipv6_unreachable(source, packet, ethlen, ICMP6_PACKET_TOO_BIG, 0);
+                       return;
+               }
+       }
 
-               case RMODE_SWITCH:
-                       n = route_mac(packet);
-                       if(n)
-                               send_packet(n, packet);
-                       else
-                               broadcast_packet(myself, packet);
-                       break;
+       clamp_mss(source, via, packet);
+       send_packet(subnet->owner, packet);
+}
 
-               case RMODE_HUB:
-                       broadcast_packet(myself, packet);
-                       break;
+static bool do_decrement_ttl(node_t *source, vpn_packet_t *packet) {
+       uint16_t type = packet->data[12] << 8 | packet->data[13];
+       length_t ethlen = ether_size;
+
+       if(type == ETH_P_8021Q) {
+               type = packet->data[16] << 8 | packet->data[17];
+               ethlen += 4;
+       }
+
+       switch (type) {
+               case ETH_P_IP:
+                       if(!checklength(source, packet, ethlen + ip_size))
+                               return false;
+
+                       if(packet->data[ethlen + 8] < 1) {
+                               if(packet->data[ethlen + 11] != IPPROTO_ICMP || packet->data[ethlen + 32] != ICMP_TIME_EXCEEDED)
+                                       route_ipv4_unreachable(source, packet, ethlen, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL);
+                               return false;
+                       }
+
+                       uint16_t old = packet->data[ethlen + 8] << 8 | packet->data[ethlen + 9];
+                       packet->data[ethlen + 8]--;
+                       uint16_t new = packet->data[ethlen + 8] << 8 | packet->data[ethlen + 9];
+
+                       uint32_t checksum = packet->data[ethlen + 10] << 8 | packet->data[ethlen + 11];
+                       checksum += old + (~new & 0xFFFF);
+                       while(checksum >> 16)
+                               checksum = (checksum & 0xFFFF) + (checksum >> 16);
+                       packet->data[ethlen + 10] = checksum >> 8;
+                       packet->data[ethlen + 11] = checksum & 0xff;
+
+                       return true;
+
+               case ETH_P_IPV6:
+                       if(!checklength(source, packet, ethlen + ip6_size))
+                               return false;
+
+                       if(packet->data[ethlen + 7] < 1) {
+                               if(packet->data[ethlen + 6] != IPPROTO_ICMPV6 || packet->data[ethlen + 40] != ICMP6_TIME_EXCEEDED)
+                                       route_ipv6_unreachable(source, packet, ethlen, ICMP6_TIME_EXCEEDED, ICMP6_TIME_EXCEED_TRANSIT);
+                               return false;
+                       }
+
+                       packet->data[ethlen + 7]--;
+
+                       return true;
+
+               default:
+                       return true;
        }
 }
 
-void route_incoming(node_t *source, vpn_packet_t *packet)
-{
-       if(packet->len < ether_size) {
-               ifdebug(TRAFFIC) logger(LOG_WARNING, _("Read too short packet"));
+void route(node_t *source, vpn_packet_t *packet) {
+       if(forwarding_mode == FMODE_KERNEL && source != myself) {
+               send_packet(myself, packet);
                return;
        }
 
+       if(!checklength(source, packet, ether_size))
+               return;
+
+       if(decrement_ttl && source != myself)
+               if(!do_decrement_ttl(source, packet))
+                       return;
+
        switch (routing_mode) {
                case RMODE_ROUTER:
                        {
-                               node_t *n = NULL;
-                               uint16_t type;
+                               uint16_t type = packet->data[12] << 8 | packet->data[13];
 
-                               type = ntohs(*((uint16_t *)(&packet->data[12])));
                                switch (type) {
-                                       case ETH_P_IP:
-                                               if(packet->len < ether_size + ip_size) {
-                                                       ifdebug(TRAFFIC) logger(LOG_WARNING, _("Read too short packet"));
-                                                       return;
-                                               }
+                                       case ETH_P_ARP:
+                                               route_arp(source, packet);
+                                               break;
 
-                                               n = route_ipv4(packet);
+                                       case ETH_P_IP:
+                                               route_ipv4(source, packet);
                                                break;
 
                                        case ETH_P_IPV6:
-                                               if(packet->len < ether_size + ip6_size) {
-                                                       ifdebug(TRAFFIC) logger(LOG_WARNING, _("Read too short packet"));
-                                                       return;
-                                               }
-
-                                               n = route_ipv6(packet);
+                                               route_ipv6(source, packet);
                                                break;
 
                                        default:
-                                               n = myself;
+                                               ifdebug(TRAFFIC) logger(LOG_WARNING, "Cannot route packet from %s (%s): unknown type %hx", source->name, source->hostname, type);
                                                break;
                                }
-
-                               if(n) {
-                                       if(n == myself) {
-                                               if(overwrite_mac)
-                                                       memcpy(packet->data, mymac.x, ETH_ALEN);
-                                               write_packet(packet);
-                                       } else
-                                               send_packet(n, packet);
-                               }
                        }
                        break;
 
                case RMODE_SWITCH:
-                       {
-                               subnet_t *subnet;
-
-                               subnet = lookup_subnet_mac((mac_t *)(&packet->data[0]));
-
-                               if(subnet) {
-                                       if(subnet->owner == myself)
-                                               write_packet(packet);
-                                       else
-                                               send_packet(subnet->owner, packet);
-                               } else {
-                                       broadcast_packet(source, packet);
-                                       write_packet(packet);
-                               }
-                       }
+                       route_mac(source, packet);
                        break;
 
                case RMODE_HUB:
-                       broadcast_packet(source, packet);       /* Spread it on */
-                       write_packet(packet);
+                       broadcast_packet(source, packet);
                        break;
        }
 }