Introduce lightweight PMTU probe replies.
[tinc] / src / net_packet.c
1 /*
2     net_packet.c -- Handles in- and outgoing VPN packets
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2013 Guus Sliepen <guus@tinc-vpn.org>
5                   2010      Timothy Redaelli <timothy@redaelli.eu>
6                   2010      Brandon Black <blblack@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #ifdef HAVE_ZLIB
26 #include <zlib.h>
27 #endif
28
29 #ifdef HAVE_LZO
30 #include LZO1X_H
31 #endif
32
33 #include "cipher.h"
34 #include "conf.h"
35 #include "connection.h"
36 #include "crypto.h"
37 #include "digest.h"
38 #include "device.h"
39 #include "ethernet.h"
40 #include "graph.h"
41 #include "logger.h"
42 #include "net.h"
43 #include "netutl.h"
44 #include "protocol.h"
45 #include "process.h"
46 #include "route.h"
47 #include "utils.h"
48 #include "xalloc.h"
49
50 int keylifetime = 0;
51 #ifdef HAVE_LZO
52 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
53 #endif
54
55 static void send_udppacket(node_t *, vpn_packet_t *);
56
57 unsigned replaywin = 16;
58 bool localdiscovery = false;
59 sockaddr_t localdiscovery_address;
60
61 #define MAX_SEQNO 1073741824
62
63 /* mtuprobes == 1..30: initial discovery, send bursts with 1 second interval
64    mtuprobes ==    31: sleep pinginterval seconds
65    mtuprobes ==    32: send 1 burst, sleep pingtimeout second
66    mtuprobes ==    33: no response from other side, restart PMTU discovery process
67
68    Probes are sent in batches of at least three, with random sizes between the
69    lower and upper boundaries for the MTU thus far discovered.
70
71    After the initial discovery, a fourth packet is added to each batch with a
72    size larger than the currently known PMTU, to test if the PMTU has increased.
73
74    In case local discovery is enabled, another packet is added to each batch,
75    which will be broadcast to the local network.
76
77 */
78
79 static void send_mtu_probe_handler(void *data) {
80         node_t *n = data;
81         int timeout = 1;
82
83         n->mtuprobes++;
84
85         if(!n->status.reachable || !n->status.validkey) {
86                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send MTU probe to unreachable or rekeying node %s (%s)", n->name, n->hostname);
87                 n->mtuprobes = 0;
88                 return;
89         }
90
91         if(n->mtuprobes > 32) {
92                 if(!n->minmtu) {
93                         n->mtuprobes = 31;
94                         timeout = pinginterval;
95                         goto end;
96                 }
97
98                 logger(DEBUG_TRAFFIC, LOG_INFO, "%s (%s) did not respond to UDP ping, restarting PMTU discovery", n->name, n->hostname);
99                 n->status.udp_confirmed = false;
100                 n->mtuprobes = 1;
101                 n->minmtu = 0;
102                 n->maxmtu = MTU;
103         }
104
105         if(n->mtuprobes >= 10 && n->mtuprobes < 32 && !n->minmtu) {
106                 logger(DEBUG_TRAFFIC, LOG_INFO, "No response to MTU probes from %s (%s)", n->name, n->hostname);
107                 n->mtuprobes = 31;
108         }
109
110         if(n->mtuprobes == 30 || (n->mtuprobes < 30 && n->minmtu >= n->maxmtu)) {
111                 if(n->minmtu > n->maxmtu)
112                         n->minmtu = n->maxmtu;
113                 else
114                         n->maxmtu = n->minmtu;
115                 n->mtu = n->minmtu;
116                 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
117                 n->mtuprobes = 31;
118         }
119
120         if(n->mtuprobes == 31) {
121                 timeout = pinginterval;
122                 goto end;
123         } else if(n->mtuprobes == 32) {
124                 timeout = pingtimeout;
125         }
126
127         for(int i = 0; i < 4 + localdiscovery; i++) {
128                 int len;
129
130                 if(i == 0) {
131                         if(n->mtuprobes < 30 || n->maxmtu + 8 >= MTU)
132                                 continue;
133                         len = n->maxmtu + 8;
134                 } else if(n->maxmtu <= n->minmtu) {
135                         len = n->maxmtu;
136                 } else {
137                         len = n->minmtu + 1 + rand() % (n->maxmtu - n->minmtu);
138                 }
139
140                 if(len < 64)
141                         len = 64;
142
143                 vpn_packet_t packet;
144                 memset(packet.data, 0, 14);
145                 randomize(packet.data + 14, len - 14);
146                 packet.len = len;
147                 if(i >= 4 && n->mtuprobes <= 10)
148                         packet.priority = -1;
149                 else
150                         packet.priority = 0;
151
152                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending MTU probe length %d to %s (%s)", len, n->name, n->hostname);
153
154                 send_udppacket(n, &packet);
155         }
156
157         n->probe_counter = 0;
158         gettimeofday(&n->probe_time, NULL);
159
160         /* Calculate the packet loss of incoming traffic by comparing the rate of
161            packets received to the rate with which the sequence number has increased.
162          */
163
164         if(n->received > n->prev_received)
165                 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
166         else
167                 n->packetloss = n->received_seqno <= n->prev_received_seqno;
168
169         n->prev_received_seqno = n->received_seqno;
170         n->prev_received = n->received;
171
172 end:
173         timeout_set(&n->mtutimeout, &(struct timeval){timeout, rand() % 100000});
174 }
175
176 void send_mtu_probe(node_t *n) {
177         timeout_add(&n->mtutimeout, send_mtu_probe_handler, n, &(struct timeval){1, 0});
178         send_mtu_probe_handler(n);
179 }
180
181 static void mtu_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
182         if(!packet->data[0]) {
183                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got MTU probe request %d from %s (%s)", packet->len, n->name, n->hostname);
184
185                 /* It's a probe request, send back a reply */
186
187                 /* Type 2 probe replies were introduced in protocol 17.3 */
188                 if ((n->options >> 24) == 3) {
189                         uint8_t* data = packet->data;
190                         *data++ = 2;
191                         uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
192                 } else {
193                         /* Legacy protocol: n won't understand type 2 probe replies. */
194                         packet->data[0] = 1;
195                 }
196
197                 /* Temporarily set udp_confirmed, so that the reply is sent
198                    back exactly the way it came in. */
199
200                 bool udp_confirmed = n->status.udp_confirmed;
201                 n->status.udp_confirmed = true;
202                 send_udppacket(n, packet);
203                 n->status.udp_confirmed = udp_confirmed;
204         } else {
205                 length_t probelen = len;
206                 if (packet->data[0] == 2) {
207                         if (len < 3)
208                                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) MTU probe reply from %s (%s)", n->name, n->hostname);
209                         else {
210                                 uint16_t probelen16; memcpy(&probelen16, packet->data + 1, 2); probelen = ntohs(probelen16);
211                         }
212                 }
213                 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d MTU probe reply %d from %s (%s)", packet->data[0], probelen, n->name, n->hostname);
214
215                 /* It's a valid reply: now we know bidirectional communication
216                    is possible using the address and socket that the reply
217                    packet used. */
218
219                 n->status.udp_confirmed = true;
220
221                 /* If we haven't established the PMTU yet, restart the discovery process. */
222
223                 if(n->mtuprobes > 30) {
224                         if (probelen == n->maxmtu + 8) {
225                                 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
226                                 n->maxmtu = MTU;
227                                 n->mtuprobes = 10;
228                                 return;
229                         }
230
231                         if(n->minmtu)
232                                 n->mtuprobes = 30;
233                         else
234                                 n->mtuprobes = 1;
235                 }
236
237                 /* If applicable, raise the minimum supported MTU */
238
239                 if(probelen > n->maxmtu)
240                         probelen = n->maxmtu;
241                 if(n->minmtu < probelen)
242                         n->minmtu = probelen;
243
244                 /* Calculate RTT and bandwidth.
245                    The RTT is the time between the MTU probe burst was sent and the first
246                    reply is received. The bandwidth is measured using the time between the
247                    arrival of the first and third probe reply.
248                  */
249
250                 struct timeval now, diff;
251                 gettimeofday(&now, NULL);
252                 timersub(&now, &n->probe_time, &diff);
253                 n->probe_counter++;
254
255                 if(n->probe_counter == 1) {
256                         n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
257                         n->probe_time = now;
258                 } else if(n->probe_counter == 3) {
259                         n->bandwidth = 2.0 * probelen / (diff.tv_sec + diff.tv_usec * 1e-6);
260                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, burst bandwidth %.3f Mbit/s, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->bandwidth * 8e-6, n->packetloss * 1e2);
261                 }
262         }
263 }
264
265 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
266         if(level == 0) {
267                 memcpy(dest, source, len);
268                 return len;
269         } else if(level == 10) {
270 #ifdef HAVE_LZO
271                 lzo_uint lzolen = MAXSIZE;
272                 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
273                 return lzolen;
274 #else
275                 return -1;
276 #endif
277         } else if(level < 10) {
278 #ifdef HAVE_ZLIB
279                 unsigned long destlen = MAXSIZE;
280                 if(compress2(dest, &destlen, source, len, level) == Z_OK)
281                         return destlen;
282                 else
283 #endif
284                         return -1;
285         } else {
286 #ifdef HAVE_LZO
287                 lzo_uint lzolen = MAXSIZE;
288                 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
289                 return lzolen;
290 #else
291                 return -1;
292 #endif
293         }
294
295         return -1;
296 }
297
298 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
299         if(level == 0) {
300                 memcpy(dest, source, len);
301                 return len;
302         } else if(level > 9) {
303 #ifdef HAVE_LZO
304                 lzo_uint lzolen = MAXSIZE;
305                 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
306                         return lzolen;
307                 else
308 #endif
309                         return -1;
310         }
311 #ifdef HAVE_ZLIB
312         else {
313                 unsigned long destlen = MAXSIZE;
314                 if(uncompress(dest, &destlen, source, len) == Z_OK)
315                         return destlen;
316                 else
317                         return -1;
318         }
319 #endif
320
321         return -1;
322 }
323
324 /* VPN packet I/O */
325
326 static void receive_packet(node_t *n, vpn_packet_t *packet) {
327         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
328                            packet->len, n->name, n->hostname);
329
330         n->in_packets++;
331         n->in_bytes += packet->len;
332
333         route(n, packet);
334 }
335
336 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
337         if(n->status.sptps)
338                 return sptps_verify_datagram(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
339
340         if(!digest_active(n->indigest) || inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest))
341                 return false;
342
343         return digest_verify(n->indigest, &inpkt->seqno, inpkt->len - digest_length(n->indigest), (const char *)&inpkt->seqno + inpkt->len - digest_length(n->indigest));
344 }
345
346 static void receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
347         vpn_packet_t pkt1, pkt2;
348         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
349         int nextpkt = 0;
350         vpn_packet_t *outpkt = pkt[0];
351         size_t outlen;
352
353         if(n->status.sptps) {
354                 if(!n->sptps.state) {
355                         if(!n->status.waitingforkey) {
356                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
357                                 send_req_key(n);
358                         } else {
359                                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
360                         }
361                         return;
362                 }
363                 sptps_receive_data(&n->sptps, (char *)&inpkt->seqno, inpkt->len);
364                 return;
365         }
366
367         if(!cipher_active(n->incipher)) {
368                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
369                 return;
370         }
371
372         /* Check packet length */
373
374         if(inpkt->len < sizeof inpkt->seqno + digest_length(n->indigest)) {
375                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
376                                         n->name, n->hostname);
377                 return;
378         }
379
380         /* Check the message authentication code */
381
382         if(digest_active(n->indigest)) {
383                 inpkt->len -= digest_length(n->indigest);
384                 if(!digest_verify(n->indigest, &inpkt->seqno, inpkt->len, (const char *)&inpkt->seqno + inpkt->len)) {
385                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
386                         return;
387                 }
388         }
389         /* Decrypt the packet */
390
391         if(cipher_active(n->incipher)) {
392                 outpkt = pkt[nextpkt++];
393                 outlen = MAXSIZE;
394
395                 if(!cipher_decrypt(n->incipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
396                         logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
397                         return;
398                 }
399
400                 outpkt->len = outlen;
401                 inpkt = outpkt;
402         }
403
404         /* Check the sequence number */
405
406         inpkt->len -= sizeof inpkt->seqno;
407         inpkt->seqno = ntohl(inpkt->seqno);
408
409         if(replaywin) {
410                 if(inpkt->seqno != n->received_seqno + 1) {
411                         if(inpkt->seqno >= n->received_seqno + replaywin * 8) {
412                                 if(n->farfuture++ < replaywin >> 2) {
413                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
414                                                 n->name, n->hostname, inpkt->seqno - n->received_seqno - 1, n->farfuture);
415                                         return;
416                                 }
417                                 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
418                                                 inpkt->seqno - n->received_seqno - 1, n->name, n->hostname);
419                                 memset(n->late, 0, replaywin);
420                         } else if (inpkt->seqno <= n->received_seqno) {
421                                 if((n->received_seqno >= replaywin * 8 && inpkt->seqno <= n->received_seqno - replaywin * 8) || !(n->late[(inpkt->seqno / 8) % replaywin] & (1 << inpkt->seqno % 8))) {
422                                         logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
423                                                 n->name, n->hostname, inpkt->seqno, n->received_seqno);
424                                         return;
425                                 }
426                         } else {
427                                 for(int i = n->received_seqno + 1; i < inpkt->seqno; i++)
428                                         n->late[(i / 8) % replaywin] |= 1 << i % 8;
429                         }
430                 }
431
432                 n->farfuture = 0;
433                 n->late[(inpkt->seqno / 8) % replaywin] &= ~(1 << inpkt->seqno % 8);
434         }
435
436         if(inpkt->seqno > n->received_seqno)
437                 n->received_seqno = inpkt->seqno;
438
439         n->received++;
440
441         if(n->received_seqno > MAX_SEQNO)
442                 regenerate_key();
443
444         /* Decompress the packet */
445
446         length_t origlen = inpkt->len;
447
448         if(n->incompression) {
449                 outpkt = pkt[nextpkt++];
450
451                 if((outpkt->len = uncompress_packet(outpkt->data, inpkt->data, inpkt->len, n->incompression)) < 0) {
452                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
453                                                  n->name, n->hostname);
454                         return;
455                 }
456
457                 inpkt = outpkt;
458
459                 origlen -= MTU/64 + 20;
460         }
461
462         inpkt->priority = 0;
463
464         if(!inpkt->data[12] && !inpkt->data[13])
465                 mtu_probe_h(n, inpkt, origlen);
466         else
467                 receive_packet(n, inpkt);
468 }
469
470 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
471         vpn_packet_t outpkt;
472
473         if(len > sizeof outpkt.data)
474                 return;
475
476         outpkt.len = len;
477         if(c->options & OPTION_TCPONLY)
478                 outpkt.priority = 0;
479         else
480                 outpkt.priority = -1;
481         memcpy(outpkt.data, buffer, len);
482
483         receive_packet(c->node, &outpkt);
484 }
485
486 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
487         if(!n->status.validkey) {
488                 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
489                 if(!n->status.waitingforkey)
490                         send_req_key(n);
491                 else if(n->last_req_key + 10 < now.tv_sec) {
492                         logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
493                         sptps_stop(&n->sptps);
494                         n->status.waitingforkey = false;
495                         send_req_key(n);
496                 }
497                 return;
498         }
499
500         uint8_t type = 0;
501         int offset = 0;
502
503         if(!(origpkt->data[12] | origpkt->data[13])) {
504                 sptps_send_record(&n->sptps, PKT_PROBE, (char *)origpkt->data, origpkt->len);
505                 return;
506         }
507
508         if(routing_mode == RMODE_ROUTER)
509                 offset = 14;
510         else
511                 type = PKT_MAC;
512
513         if(origpkt->len < offset)
514                 return;
515
516         vpn_packet_t outpkt;
517
518         if(n->outcompression) {
519                 int len = compress_packet(outpkt.data + offset, origpkt->data + offset, origpkt->len - offset, n->outcompression);
520                 if(len < 0) {
521                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
522                 } else if(len < origpkt->len - offset) {
523                         outpkt.len = len + offset;
524                         origpkt = &outpkt;
525                         type |= PKT_COMPRESSED;
526                 }
527         }
528
529         sptps_send_record(&n->sptps, type, (char *)origpkt->data + offset, origpkt->len - offset);
530         return;
531 }
532
533 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
534         /* Latest guess */
535         *sa = &n->address;
536         *sock = n->sock;
537
538         /* If the UDP address is confirmed, use it. */
539         if(n->status.udp_confirmed)
540                 return;
541
542         /* Send every third packet to n->address; that could be set
543            to the node's reflexive UDP address discovered during key
544            exchange. */
545
546         static int x = 0;
547         if(++x >= 3) {
548                 x = 0;
549                 return;
550         }
551
552         /* Otherwise, address are found in edges to this node.
553            So we pick a random edge and a random socket. */
554
555         int i = 0;
556         int j = rand() % n->edge_tree->count;
557         edge_t *candidate = NULL;
558
559         for splay_each(edge_t, e, n->edge_tree) {
560                 if(i++ == j) {
561                         candidate = e->reverse;
562                         break;
563                 }
564         }
565
566         if(candidate) {
567                 *sa = &candidate->address;
568                 *sock = rand() % listen_sockets;
569         }
570
571         /* Make sure we have a suitable socket for the chosen address */
572         if(listen_socket[*sock].sa.sa.sa_family != (*sa)->sa.sa_family) {
573                 for(int i = 0; i < listen_sockets; i++) {
574                         if(listen_socket[i].sa.sa.sa_family == (*sa)->sa.sa_family) {
575                                 *sock = i;
576                                 break;
577                         }
578                 }
579         }
580 }
581
582 static void choose_broadcast_address(const node_t *n, const sockaddr_t **sa, int *sock) {
583         static sockaddr_t broadcast_ipv4 = {
584                 .in = {
585                         .sin_family = AF_INET,
586                         .sin_addr.s_addr = -1,
587                 }
588         };
589
590         static sockaddr_t broadcast_ipv6 = {
591                 .in6 = {
592                         .sin6_family = AF_INET6,
593                         .sin6_addr.s6_addr[0x0] = 0xff,
594                         .sin6_addr.s6_addr[0x1] = 0x02,
595                         .sin6_addr.s6_addr[0xf] = 0x01,
596                 }
597         };
598
599         *sock = rand() % listen_sockets;
600
601         if(listen_socket[*sock].sa.sa.sa_family == AF_INET6) {
602                 if(localdiscovery_address.sa.sa_family == AF_INET6) {
603                         localdiscovery_address.in6.sin6_port = n->prevedge->address.in.sin_port;
604                         *sa = &localdiscovery_address;
605                 } else {
606                         broadcast_ipv6.in6.sin6_port = n->prevedge->address.in.sin_port;
607                         broadcast_ipv6.in6.sin6_scope_id = listen_socket[*sock].sa.in6.sin6_scope_id;
608                         *sa = &broadcast_ipv6;
609                 }
610         } else {
611                 if(localdiscovery_address.sa.sa_family == AF_INET) {
612                         localdiscovery_address.in.sin_port = n->prevedge->address.in.sin_port;
613                         *sa = &localdiscovery_address;
614                 } else {
615                         broadcast_ipv4.in.sin_port = n->prevedge->address.in.sin_port;
616                         *sa = &broadcast_ipv4;
617                 }
618         }
619 }
620
621 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
622         vpn_packet_t pkt1, pkt2;
623         vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
624         vpn_packet_t *inpkt = origpkt;
625         int nextpkt = 0;
626         vpn_packet_t *outpkt;
627         int origlen = origpkt->len;
628         size_t outlen;
629 #if defined(SOL_IP) && defined(IP_TOS)
630         static int priority = 0;
631 #endif
632         int origpriority = origpkt->priority;
633
634         if(!n->status.reachable) {
635                 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
636                 return;
637         }
638
639         if(n->status.sptps)
640                 return send_sptps_packet(n, origpkt);
641
642         /* Make sure we have a valid key */
643
644         if(!n->status.validkey) {
645                 logger(DEBUG_TRAFFIC, LOG_INFO,
646                                    "No valid key known yet for %s (%s), forwarding via TCP",
647                                    n->name, n->hostname);
648
649                 if(n->last_req_key + 10 <= now.tv_sec) {
650                         send_req_key(n);
651                         n->last_req_key = now.tv_sec;
652                 }
653
654                 send_tcppacket(n->nexthop->connection, origpkt);
655
656                 return;
657         }
658
659         if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (inpkt->data[12] | inpkt->data[13])) {
660                 logger(DEBUG_TRAFFIC, LOG_INFO,
661                                 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
662                                 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
663
664                 if(n != n->nexthop)
665                         send_packet(n->nexthop, origpkt);
666                 else
667                         send_tcppacket(n->nexthop->connection, origpkt);
668
669                 return;
670         }
671
672         /* Compress the packet */
673
674         if(n->outcompression) {
675                 outpkt = pkt[nextpkt++];
676
677                 if((outpkt->len = compress_packet(outpkt->data, inpkt->data, inpkt->len, n->outcompression)) < 0) {
678                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
679                                    n->name, n->hostname);
680                         return;
681                 }
682
683                 inpkt = outpkt;
684         }
685
686         /* Add sequence number */
687
688         inpkt->seqno = htonl(++(n->sent_seqno));
689         inpkt->len += sizeof inpkt->seqno;
690
691         /* Encrypt the packet */
692
693         if(cipher_active(n->outcipher)) {
694                 outpkt = pkt[nextpkt++];
695                 outlen = MAXSIZE;
696
697                 if(!cipher_encrypt(n->outcipher, &inpkt->seqno, inpkt->len, &outpkt->seqno, &outlen, true)) {
698                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
699                         goto end;
700                 }
701
702                 outpkt->len = outlen;
703                 inpkt = outpkt;
704         }
705
706         /* Add the message authentication code */
707
708         if(digest_active(n->outdigest)) {
709                 if(!digest_create(n->outdigest, &inpkt->seqno, inpkt->len, (char *)&inpkt->seqno + inpkt->len)) {
710                         logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
711                         goto end;
712                 }
713
714                 inpkt->len += digest_length(n->outdigest);
715         }
716
717         /* Send the packet */
718
719         const sockaddr_t *sa;
720         int sock;
721
722         /* Overloaded use of priority field: -1 means local broadcast */
723
724         if(origpriority == -1 && n->prevedge)
725                 choose_broadcast_address(n, &sa, &sock);
726         else
727                 choose_udp_address(n, &sa, &sock);
728
729 #if defined(SOL_IP) && defined(IP_TOS)
730         if(priorityinheritance && origpriority != priority
731            && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
732                 priority = origpriority;
733                 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
734                 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
735                         logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", strerror(errno));
736         }
737 #endif
738
739         if(sendto(listen_socket[sock].udp.fd, (char *) &inpkt->seqno, inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
740                 if(sockmsgsize(sockerrno)) {
741                         if(n->maxmtu >= origlen)
742                                 n->maxmtu = origlen - 1;
743                         if(n->mtu >= origlen)
744                                 n->mtu = origlen - 1;
745                 } else
746                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
747         }
748
749 end:
750         origpkt->len = origlen;
751 }
752
753 bool send_sptps_data(void *handle, uint8_t type, const char *data, size_t len) {
754         node_t *to = handle;
755
756         /* Send it via TCP if it is a handshake packet, TCPOnly is in use, or this packet is larger than the MTU. */
757
758         if(type >= SPTPS_HANDSHAKE || ((myself->options | to->options) & OPTION_TCPONLY) || (type != PKT_PROBE && len > to->minmtu)) {
759                 char buf[len * 4 / 3 + 5];
760                 b64encode(data, buf, len);
761                 /* If no valid key is known yet, send the packets using ANS_KEY requests,
762                    to ensure we get to learn the reflexive UDP address. */
763                 if(!to->status.validkey)
764                         return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, myself->name, to->name, buf, myself->incompression);
765                 else
766                         return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, myself->name, to->name, REQ_SPTPS, buf);
767         }
768
769         /* Otherwise, send the packet via UDP */
770
771         const sockaddr_t *sa;
772         int sock;
773
774         choose_udp_address(to, &sa, &sock);
775
776         if(sendto(listen_socket[sock].udp.fd, data, len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
777                 if(sockmsgsize(sockerrno)) {
778                         if(to->maxmtu >= len)
779                                 to->maxmtu = len - 1;
780                         if(to->mtu >= len)
781                                 to->mtu = len - 1;
782                 } else {
783                         logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", to->name, to->hostname, sockstrerror(sockerrno));
784                         return false;
785                 }
786         }
787
788         return true;
789 }
790
791 bool receive_sptps_record(void *handle, uint8_t type, const char *data, uint16_t len) {
792         node_t *from = handle;
793
794         if(type == SPTPS_HANDSHAKE) {
795                 if(!from->status.validkey) {
796                         from->status.validkey = true;
797                         from->status.waitingforkey = false;
798                         logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
799                 }
800                 return true;
801         }
802
803         if(len > MTU) {
804                 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
805                 return false;
806         }
807
808         vpn_packet_t inpkt;
809
810         if(type == PKT_PROBE) {
811                 inpkt.len = len;
812                 memcpy(inpkt.data, data, len);
813                 mtu_probe_h(from, &inpkt, len);
814                 return true;
815         }
816
817         if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
818                 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
819                 return false;
820         }
821
822         /* Check if we have the headers we need */
823         if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
824                 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
825                 return false;
826         } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
827                 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
828         }
829
830         int offset = (type & PKT_MAC) ? 0 : 14;
831         if(type & PKT_COMPRESSED) {
832                 length_t ulen = uncompress_packet(inpkt.data + offset, (const uint8_t *)data, len, from->incompression);
833                 if(ulen < 0) {
834                         return false;
835                 } else {
836                         inpkt.len = ulen + offset;
837                 }
838                 if(inpkt.len > MAXSIZE)
839                         abort();
840         } else {
841                 memcpy(inpkt.data + offset, data, len);
842                 inpkt.len = len + offset;
843         }
844
845         /* Generate the Ethernet packet type if necessary */
846         if(offset) {
847                 switch(inpkt.data[14] >> 4) {
848                         case 4:
849                                 inpkt.data[12] = 0x08;
850                                 inpkt.data[13] = 0x00;
851                                 break;
852                         case 6:
853                                 inpkt.data[12] = 0x86;
854                                 inpkt.data[13] = 0xDD;
855                                 break;
856                         default:
857                                 logger(DEBUG_TRAFFIC, LOG_ERR,
858                                                    "Unknown IP version %d while reading packet from %s (%s)",
859                                                    inpkt.data[14] >> 4, from->name, from->hostname);
860                                 return false;
861                 }
862         }
863
864         receive_packet(from, &inpkt);
865         return true;
866 }
867
868 /*
869   send a packet to the given vpn ip.
870 */
871 void send_packet(node_t *n, vpn_packet_t *packet) {
872         node_t *via;
873
874         if(n == myself) {
875                 if(overwrite_mac)
876                          memcpy(packet->data, mymac.x, ETH_ALEN);
877                 n->out_packets++;
878                 n->out_bytes += packet->len;
879                 devops.write(packet);
880                 return;
881         }
882
883         logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)",
884                            packet->len, n->name, n->hostname);
885
886         if(!n->status.reachable) {
887                 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable",
888                                    n->name, n->hostname);
889                 return;
890         }
891
892         n->out_packets++;
893         n->out_bytes += packet->len;
894
895         if(n->status.sptps) {
896                 send_sptps_packet(n, packet);
897                 return;
898         }
899
900         via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
901
902         if(via != n)
903                 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)",
904                            n->name, via->name, n->via->hostname);
905
906         if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
907                 if(!send_tcppacket(via->connection, packet))
908                         terminate_connection(via->connection, true);
909         } else
910                 send_udppacket(via, packet);
911 }
912
913 /* Broadcast a packet using the minimum spanning tree */
914
915 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
916         // Always give ourself a copy of the packet.
917         if(from != myself)
918                 send_packet(myself, packet);
919
920         // In TunnelServer mode, do not forward broadcast packets.
921         // The MST might not be valid and create loops.
922         if(tunnelserver || broadcast_mode == BMODE_NONE)
923                 return;
924
925         logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
926                            packet->len, from->name, from->hostname);
927
928         switch(broadcast_mode) {
929                 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
930                 // This guarantees all nodes receive the broadcast packet, and
931                 // usually distributes the sending of broadcast packets over all nodes.
932                 case BMODE_MST:
933                         for list_each(connection_t, c, connection_list)
934                                 if(c->status.active && c->status.mst && c != from->nexthop->connection)
935                                         send_packet(c->node, packet);
936                         break;
937
938                 // In direct mode, we send copies to each node we know of.
939                 // However, this only reaches nodes that can be reached in a single hop.
940                 // We don't have enough information to forward broadcast packets in this case.
941                 case BMODE_DIRECT:
942                         if(from != myself)
943                                 break;
944
945                         for splay_each(node_t, n, node_tree)
946                                 if(n->status.reachable && ((n->via == myself && n->nexthop == n) || n->via == n))
947                                         send_packet(n, packet);
948                         break;
949
950                 default:
951                         break;
952         }
953 }
954
955 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
956         node_t *n = NULL;
957         bool hard = false;
958         static time_t last_hard_try = 0;
959
960         for splay_each(edge_t, e, edge_weight_tree) {
961                 if(!e->to->status.reachable || e->to == myself)
962                         continue;
963
964                 if(sockaddrcmp_noport(from, &e->address)) {
965                         if(last_hard_try == now.tv_sec)
966                                 continue;
967                         hard = true;
968                 }
969
970                 if(!try_mac(e->to, pkt))
971                         continue;
972
973                 n = e->to;
974                 break;
975         }
976
977         if(hard)
978                 last_hard_try = now.tv_sec;
979
980         last_hard_try = now.tv_sec;
981         return n;
982 }
983
984 void handle_incoming_vpn_data(void *data, int flags) {
985         listen_socket_t *ls = data;
986         vpn_packet_t pkt;
987         char *hostname;
988         sockaddr_t from = {{0}};
989         socklen_t fromlen = sizeof from;
990         node_t *n;
991         int len;
992
993         len = recvfrom(ls->udp.fd, (char *) &pkt.seqno, MAXSIZE, 0, &from.sa, &fromlen);
994
995         if(len <= 0 || len > MAXSIZE) {
996                 if(!sockwouldblock(sockerrno))
997                         logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
998                 return;
999         }
1000
1001         pkt.len = len;
1002
1003         sockaddrunmap(&from); /* Some braindead IPv6 implementations do stupid things. */
1004
1005         n = lookup_node_udp(&from);
1006
1007         if(!n) {
1008                 n = try_harder(&from, &pkt);
1009                 if(n)
1010                         update_node_udp(n, &from);
1011                 else if(debug_level >= DEBUG_PROTOCOL) {
1012                         hostname = sockaddr2hostname(&from);
1013                         logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1014                         free(hostname);
1015                         return;
1016                 }
1017                 else
1018                         return;
1019         }
1020
1021         n->sock = ls - listen_socket;
1022
1023         receive_udppacket(n, &pkt);
1024 }
1025
1026 void handle_device_data(void *data, int flags) {
1027         vpn_packet_t packet;
1028
1029         packet.priority = 0;
1030
1031         if(devops.read(&packet)) {
1032                 myself->in_packets++;
1033                 myself->in_bytes += packet.len;
1034                 route(myself, &packet);
1035         }
1036 }