2 net_packet.c -- Handles in- and outgoing VPN packets
3 Copyright (C) 1998-2005 Ivo Timmermans,
4 2000-2014 Guus Sliepen <guus@tinc-vpn.org>
5 2010 Timothy Redaelli <timothy@redaelli.eu>
6 2010 Brandon Black <blblack@gmail.com>
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License along
19 with this program; if not, write to the Free Software Foundation, Inc.,
20 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
35 #include "connection.h"
52 #define MAX(a, b) ((a) > (b) ? (a) : (b))
57 static char lzo_wrkmem[LZO1X_999_MEM_COMPRESS > LZO1X_1_MEM_COMPRESS ? LZO1X_999_MEM_COMPRESS : LZO1X_1_MEM_COMPRESS];
60 static void send_udppacket(node_t *, vpn_packet_t *);
62 unsigned replaywin = 16;
63 bool localdiscovery = true;
64 bool udp_discovery = true;
65 int udp_discovery_keepalive_interval = 9;
66 int udp_discovery_interval = 2;
67 int udp_discovery_timeout = 30;
69 #define MAX_SEQNO 1073741824
71 static void try_fix_mtu(node_t *n) {
75 if(n->mtuprobes == 20 || n->minmtu >= n->maxmtu) {
76 if(n->minmtu > n->maxmtu)
77 n->minmtu = n->maxmtu;
79 n->maxmtu = n->minmtu;
81 logger(DEBUG_TRAFFIC, LOG_INFO, "Fixing MTU of %s (%s) to %d after %d probes", n->name, n->hostname, n->mtu, n->mtuprobes);
86 static void udp_probe_timeout_handler(void *data) {
88 if(!n->status.udp_confirmed)
91 logger(DEBUG_TRAFFIC, LOG_INFO, "Too much time has elapsed since last UDP ping response from %s (%s), stopping UDP communication", n->name, n->hostname);
92 n->status.udp_confirmed = false;
98 static void udp_probe_h(node_t *n, vpn_packet_t *packet, length_t len) {
99 if(!DATA(packet)[0]) {
100 logger(DEBUG_TRAFFIC, LOG_INFO, "Got UDP probe request %d from %s (%s)", packet->len, n->name, n->hostname);
102 /* It's a probe request, send back a reply */
104 /* Type 2 probe replies were introduced in protocol 17.3 */
105 if ((n->options >> 24) >= 3) {
106 uint8_t *data = DATA(packet);
108 uint16_t len16 = htons(len); memcpy(data, &len16, 2); data += 2;
110 gettimeofday(&now, NULL);
111 uint32_t sec = htonl(now.tv_sec); memcpy(data, &sec, 4); data += 4;
112 uint32_t usec = htonl(now.tv_usec); memcpy(data, &usec, 4); data += 4;
115 /* Legacy protocol: n won't understand type 2 probe replies. */
119 /* Temporarily set udp_confirmed, so that the reply is sent
120 back exactly the way it came in. */
122 bool udp_confirmed = n->status.udp_confirmed;
123 n->status.udp_confirmed = true;
124 send_udppacket(n, packet);
125 n->status.udp_confirmed = udp_confirmed;
127 length_t probelen = len;
128 if (DATA(packet)[0] == 2) {
130 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received invalid (too short) UDP probe reply from %s (%s)", n->name, n->hostname);
132 uint16_t probelen16; memcpy(&probelen16, DATA(packet) + 1, 2); probelen = ntohs(probelen16);
135 logger(DEBUG_TRAFFIC, LOG_INFO, "Got type %d UDP probe reply %d from %s (%s)", DATA(packet)[0], probelen, n->name, n->hostname);
137 /* It's a valid reply: now we know bidirectional communication
138 is possible using the address and socket that the reply
140 n->status.udp_confirmed = true;
143 timeout_del(&n->udp_ping_timeout);
144 timeout_add(&n->udp_ping_timeout, &udp_probe_timeout_handler, n, &(struct timeval){udp_discovery_timeout, 0});
147 if(probelen >= n->maxmtu + 1) {
148 logger(DEBUG_TRAFFIC, LOG_INFO, "Increase in PMTU to %s (%s) detected, restarting PMTU discovery", n->name, n->hostname);
150 /* Set mtuprobes to 1 so that try_mtu() doesn't reset maxmtu */
155 /* If applicable, raise the minimum supported MTU */
157 if(probelen > n->maxmtu)
158 probelen = n->maxmtu;
159 if(n->minmtu < probelen) {
160 n->minmtu = probelen;
165 The RTT is the time between the MTU probe burst was sent and the first
169 struct timeval now, diff;
170 gettimeofday(&now, NULL);
171 timersub(&now, &n->probe_time, &diff);
173 struct timeval probe_timestamp = now;
174 if (DATA(packet)[0] == 2 && packet->len >= 11) {
175 uint32_t sec; memcpy(&sec, DATA(packet) + 3, 4);
176 uint32_t usec; memcpy(&usec, DATA(packet) + 7, 4);
177 probe_timestamp.tv_sec = ntohl(sec);
178 probe_timestamp.tv_usec = ntohl(usec);
183 if(n->probe_counter == 1) {
184 n->rtt = diff.tv_sec + diff.tv_usec * 1e-6;
185 n->probe_time = probe_timestamp;
186 logger(DEBUG_TRAFFIC, LOG_DEBUG, "%s (%s) RTT %.2f ms, rx packet loss %.2f %%", n->name, n->hostname, n->rtt * 1e3, n->packetloss * 1e2);
191 static length_t compress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
193 memcpy(dest, source, len);
195 } else if(level == 10) {
197 lzo_uint lzolen = MAXSIZE;
198 lzo1x_1_compress(source, len, dest, &lzolen, lzo_wrkmem);
203 } else if(level < 10) {
205 unsigned long destlen = MAXSIZE;
206 if(compress2(dest, &destlen, source, len, level) == Z_OK)
213 lzo_uint lzolen = MAXSIZE;
214 lzo1x_999_compress(source, len, dest, &lzolen, lzo_wrkmem);
224 static length_t uncompress_packet(uint8_t *dest, const uint8_t *source, length_t len, int level) {
226 memcpy(dest, source, len);
228 } else if(level > 9) {
230 lzo_uint lzolen = MAXSIZE;
231 if(lzo1x_decompress_safe(source, len, dest, &lzolen, NULL) == LZO_E_OK)
239 unsigned long destlen = MAXSIZE;
240 if(uncompress(dest, &destlen, source, len) == Z_OK)
252 static void receive_packet(node_t *n, vpn_packet_t *packet) {
253 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Received packet of %d bytes from %s (%s)",
254 packet->len, n->name, n->hostname);
257 n->in_bytes += packet->len;
262 static bool try_mac(node_t *n, const vpn_packet_t *inpkt) {
264 return sptps_verify_datagram(&n->sptps, DATA(inpkt), inpkt->len);
266 #ifdef DISABLE_LEGACY
269 if(!digest_active(n->indigest) || inpkt->len < sizeof(seqno_t) + digest_length(n->indigest))
272 return digest_verify(n->indigest, SEQNO(inpkt), inpkt->len - digest_length(n->indigest), DATA(inpkt) + inpkt->len - digest_length(n->indigest));
276 static bool receive_udppacket(node_t *n, vpn_packet_t *inpkt) {
277 vpn_packet_t pkt1, pkt2;
278 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
281 pkt1.offset = DEFAULT_PACKET_OFFSET;
282 pkt2.offset = DEFAULT_PACKET_OFFSET;
284 if(n->status.sptps) {
285 if(!n->sptps.state) {
286 if(!n->status.waitingforkey) {
287 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but we haven't exchanged keys yet", n->name, n->hostname);
290 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
294 inpkt->offset += 2 * sizeof(node_id_t);
295 if(!sptps_receive_data(&n->sptps, DATA(inpkt), inpkt->len - 2 * sizeof(node_id_t))) {
296 logger(DEBUG_TRAFFIC, LOG_ERR, "Got bad packet from %s (%s)", n->name, n->hostname);
302 #ifdef DISABLE_LEGACY
305 if(!n->status.validkey_in) {
306 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got packet from %s (%s) but he hasn't got our key yet", n->name, n->hostname);
310 /* Check packet length */
312 if(inpkt->len < sizeof(seqno_t) + digest_length(n->indigest)) {
313 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got too short packet from %s (%s)",
314 n->name, n->hostname);
318 /* It's a legacy UDP packet, the data starts after the seqno */
320 inpkt->offset += sizeof(seqno_t);
322 /* Check the message authentication code */
324 if(digest_active(n->indigest)) {
325 inpkt->len -= digest_length(n->indigest);
326 if(!digest_verify(n->indigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
327 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Got unauthenticated packet from %s (%s)", n->name, n->hostname);
331 /* Decrypt the packet */
333 if(cipher_active(n->incipher)) {
334 vpn_packet_t *outpkt = pkt[nextpkt++];
337 if(!cipher_decrypt(n->incipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
338 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Error decrypting packet from %s (%s)", n->name, n->hostname);
342 outpkt->len = outlen;
346 /* Check the sequence number */
349 memcpy(&seqno, SEQNO(inpkt), sizeof seqno);
350 seqno = ntohl(seqno);
351 inpkt->len -= sizeof seqno;
354 if(seqno != n->received_seqno + 1) {
355 if(seqno >= n->received_seqno + replaywin * 8) {
356 if(n->farfuture++ < replaywin >> 2) {
357 logger(DEBUG_ALWAYS, LOG_WARNING, "Packet from %s (%s) is %d seqs in the future, dropped (%u)",
358 n->name, n->hostname, seqno - n->received_seqno - 1, n->farfuture);
361 logger(DEBUG_ALWAYS, LOG_WARNING, "Lost %d packets from %s (%s)",
362 seqno - n->received_seqno - 1, n->name, n->hostname);
363 memset(n->late, 0, replaywin);
364 } else if (seqno <= n->received_seqno) {
365 if((n->received_seqno >= replaywin * 8 && seqno <= n->received_seqno - replaywin * 8) || !(n->late[(seqno / 8) % replaywin] & (1 << seqno % 8))) {
366 logger(DEBUG_ALWAYS, LOG_WARNING, "Got late or replayed packet from %s (%s), seqno %d, last received %d",
367 n->name, n->hostname, seqno, n->received_seqno);
371 for(int i = n->received_seqno + 1; i < seqno; i++)
372 n->late[(i / 8) % replaywin] |= 1 << i % 8;
377 n->late[(seqno / 8) % replaywin] &= ~(1 << seqno % 8);
380 if(seqno > n->received_seqno)
381 n->received_seqno = seqno;
385 if(n->received_seqno > MAX_SEQNO)
388 /* Decompress the packet */
390 length_t origlen = inpkt->len;
392 if(n->incompression) {
393 vpn_packet_t *outpkt = pkt[nextpkt++];
395 if((outpkt->len = uncompress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->incompression)) < 0) {
396 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while uncompressing packet from %s (%s)",
397 n->name, n->hostname);
403 origlen -= MTU/64 + 20;
408 if(!DATA(inpkt)[12] && !DATA(inpkt)[13])
409 udp_probe_h(n, inpkt, origlen);
411 receive_packet(n, inpkt);
416 void receive_tcppacket(connection_t *c, const char *buffer, int len) {
418 outpkt.offset = DEFAULT_PACKET_OFFSET;
420 if(len > sizeof outpkt.data - outpkt.offset)
424 if(c->options & OPTION_TCPONLY)
427 outpkt.priority = -1;
428 memcpy(DATA(&outpkt), buffer, len);
430 receive_packet(c->node, &outpkt);
433 static void send_sptps_packet(node_t *n, vpn_packet_t *origpkt) {
434 if(!n->status.validkey && !n->connection)
440 if(!(DATA(origpkt)[12] | DATA(origpkt)[13])) {
441 sptps_send_record(&n->sptps, PKT_PROBE, (char *)DATA(origpkt), origpkt->len);
445 if(routing_mode == RMODE_ROUTER)
450 if(origpkt->len < offset)
455 if(n->outcompression) {
457 int len = compress_packet(DATA(&outpkt) + offset, DATA(origpkt) + offset, origpkt->len - offset, n->outcompression);
459 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)", n->name, n->hostname);
460 } else if(len < origpkt->len - offset) {
461 outpkt.len = len + offset;
463 type |= PKT_COMPRESSED;
467 /* If we have a direct metaconnection to n, and we can't use UDP, then
468 don't bother with SPTPS and just use a "plaintext" PACKET message.
469 We don't really care about end-to-end security since we're not
470 sending the message through any intermediate nodes. */
471 if(n->connection && origpkt->len > n->minmtu)
472 send_tcppacket(n->connection, origpkt);
474 sptps_send_record(&n->sptps, type, DATA(origpkt) + offset, origpkt->len - offset);
478 static void adapt_socket(const sockaddr_t *sa, int *sock) {
479 /* Make sure we have a suitable socket for the chosen address */
480 if(listen_socket[*sock].sa.sa.sa_family != sa->sa.sa_family) {
481 for(int i = 0; i < listen_sockets; i++) {
482 if(listen_socket[i].sa.sa.sa_family == sa->sa.sa_family) {
490 static void choose_udp_address(const node_t *n, const sockaddr_t **sa, int *sock) {
495 /* If the UDP address is confirmed, use it. */
496 if(n->status.udp_confirmed)
499 /* Send every third packet to n->address; that could be set
500 to the node's reflexive UDP address discovered during key
509 /* Otherwise, address are found in edges to this node.
510 So we pick a random edge and a random socket. */
513 int j = rand() % n->edge_tree->count;
514 edge_t *candidate = NULL;
516 for splay_each(edge_t, e, n->edge_tree) {
518 candidate = e->reverse;
524 *sa = &candidate->address;
525 *sock = rand() % listen_sockets;
528 adapt_socket(*sa, sock);
531 static void choose_local_address(const node_t *n, const sockaddr_t **sa, int *sock) {
534 /* Pick one of the edges from this node at random, then use its local address. */
537 int j = rand() % n->edge_tree->count;
538 edge_t *candidate = NULL;
540 for splay_each(edge_t, e, n->edge_tree) {
547 if (candidate && candidate->local_address.sa.sa_family) {
548 *sa = &candidate->local_address;
549 *sock = rand() % listen_sockets;
550 adapt_socket(*sa, sock);
554 static void send_udppacket(node_t *n, vpn_packet_t *origpkt) {
555 vpn_packet_t pkt1, pkt2;
556 vpn_packet_t *pkt[] = { &pkt1, &pkt2, &pkt1, &pkt2 };
557 vpn_packet_t *inpkt = origpkt;
559 vpn_packet_t *outpkt;
560 int origlen = origpkt->len;
562 #if defined(SOL_IP) && defined(IP_TOS)
563 static int priority = 0;
564 int origpriority = origpkt->priority;
567 pkt1.offset = DEFAULT_PACKET_OFFSET;
568 pkt2.offset = DEFAULT_PACKET_OFFSET;
570 if(!n->status.reachable) {
571 logger(DEBUG_TRAFFIC, LOG_INFO, "Trying to send UDP packet to unreachable node %s (%s)", n->name, n->hostname);
576 return send_sptps_packet(n, origpkt);
578 #ifdef DISABLE_LEGACY
581 /* Make sure we have a valid key */
583 if(!n->status.validkey) {
584 logger(DEBUG_TRAFFIC, LOG_INFO,
585 "No valid key known yet for %s (%s), forwarding via TCP",
586 n->name, n->hostname);
587 send_tcppacket(n->nexthop->connection, origpkt);
591 if(n->options & OPTION_PMTU_DISCOVERY && inpkt->len > n->minmtu && (DATA(inpkt)[12] | DATA(inpkt)[13])) {
592 logger(DEBUG_TRAFFIC, LOG_INFO,
593 "Packet for %s (%s) larger than minimum MTU, forwarding via %s",
594 n->name, n->hostname, n != n->nexthop ? n->nexthop->name : "TCP");
597 send_packet(n->nexthop, origpkt);
599 send_tcppacket(n->nexthop->connection, origpkt);
604 /* Compress the packet */
606 if(n->outcompression) {
607 outpkt = pkt[nextpkt++];
609 if((outpkt->len = compress_packet(DATA(outpkt), DATA(inpkt), inpkt->len, n->outcompression)) < 0) {
610 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while compressing packet to %s (%s)",
611 n->name, n->hostname);
618 /* Add sequence number */
620 seqno_t seqno = htonl(++(n->sent_seqno));
621 memcpy(SEQNO(inpkt), &seqno, sizeof seqno);
622 inpkt->len += sizeof seqno;
624 /* Encrypt the packet */
626 if(cipher_active(n->outcipher)) {
627 outpkt = pkt[nextpkt++];
630 if(!cipher_encrypt(n->outcipher, SEQNO(inpkt), inpkt->len, SEQNO(outpkt), &outlen, true)) {
631 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
635 outpkt->len = outlen;
639 /* Add the message authentication code */
641 if(digest_active(n->outdigest)) {
642 if(!digest_create(n->outdigest, SEQNO(inpkt), inpkt->len, SEQNO(inpkt) + inpkt->len)) {
643 logger(DEBUG_TRAFFIC, LOG_ERR, "Error while encrypting packet to %s (%s)", n->name, n->hostname);
647 inpkt->len += digest_length(n->outdigest);
650 /* Send the packet */
652 const sockaddr_t *sa = NULL;
655 if(n->status.send_locally)
656 choose_local_address(n, &sa, &sock);
658 choose_udp_address(n, &sa, &sock);
660 #if defined(SOL_IP) && defined(IP_TOS)
661 if(priorityinheritance && origpriority != priority
662 && listen_socket[n->sock].sa.sa.sa_family == AF_INET) {
663 priority = origpriority;
664 logger(DEBUG_TRAFFIC, LOG_DEBUG, "Setting outgoing packet priority to %d", priority);
665 if(setsockopt(listen_socket[n->sock].udp.fd, SOL_IP, IP_TOS, &priority, sizeof(priority))) /* SO_PRIORITY doesn't seem to work */
666 logger(DEBUG_ALWAYS, LOG_ERR, "System call `%s' failed: %s", "setsockopt", sockstrerror(sockerrno));
670 if(sendto(listen_socket[sock].udp.fd, SEQNO(inpkt), inpkt->len, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
671 if(sockmsgsize(sockerrno)) {
672 if(n->maxmtu >= origlen)
673 n->maxmtu = origlen - 1;
674 if(n->mtu >= origlen)
675 n->mtu = origlen - 1;
678 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending packet to %s (%s): %s", n->name, n->hostname, sockstrerror(sockerrno));
682 origpkt->len = origlen;
686 static bool send_sptps_data_priv(node_t *to, node_t *from, int type, const void *data, size_t len) {
687 node_t *relay = (to->via != myself && (type == PKT_PROBE || (len - SPTPS_DATAGRAM_OVERHEAD) <= to->via->minmtu)) ? to->via : to->nexthop;
688 bool direct = from == myself && to == relay;
689 bool relay_supported = (relay->options >> 24) >= 4;
690 bool tcponly = (myself->options | relay->options) & OPTION_TCPONLY;
692 /* Send it via TCP if it is a handshake packet, TCPOnly is in use, this is a relay packet that the other node cannot understand, or this packet is larger than the MTU.
693 TODO: When relaying, the original sender does not know the end-to-end PMTU (it only knows the PMTU of the first hop).
694 This can lead to scenarios where large packets are sent over UDP to relay, but then relay has no choice but fall back to TCP. */
696 if(type == SPTPS_HANDSHAKE || tcponly || (!direct && !relay_supported) || (type != PKT_PROBE && (len - SPTPS_DATAGRAM_OVERHEAD) > relay->minmtu)) {
697 char buf[len * 4 / 3 + 5];
698 b64encode(data, buf, len);
699 /* If no valid key is known yet, send the packets using ANS_KEY requests,
700 to ensure we get to learn the reflexive UDP address. */
701 if(from == myself && !to->status.validkey) {
702 to->incompression = myself->incompression;
703 return send_request(to->nexthop->connection, "%d %s %s %s -1 -1 -1 %d", ANS_KEY, from->name, to->name, buf, to->incompression);
705 return send_request(to->nexthop->connection, "%d %s %s %d %s", REQ_KEY, from->name, to->name, REQ_SPTPS, buf);
710 if(relay_supported) overhead += sizeof to->id + sizeof from->id;
711 char buf[len + overhead]; char* buf_ptr = buf;
712 if(relay_supported) {
714 /* Inform the recipient that this packet was sent directly. */
715 node_id_t nullid = {};
716 memcpy(buf_ptr, &nullid, sizeof nullid); buf_ptr += sizeof nullid;
718 memcpy(buf_ptr, &to->id, sizeof to->id); buf_ptr += sizeof to->id;
720 memcpy(buf_ptr, &from->id, sizeof from->id); buf_ptr += sizeof from->id;
723 /* TODO: if this copy turns out to be a performance concern, change sptps_send_record() to add some "pre-padding" to the buffer and use that instead */
724 memcpy(buf_ptr, data, len); buf_ptr += len;
726 const sockaddr_t *sa = NULL;
728 if(relay->status.send_locally)
729 choose_local_address(relay, &sa, &sock);
731 choose_udp_address(relay, &sa, &sock);
732 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet from %s (%s) to %s (%s) via %s (%s)", from->name, from->hostname, to->name, to->hostname, relay->name, relay->hostname);
733 if(sendto(listen_socket[sock].udp.fd, buf, buf_ptr - buf, 0, &sa->sa, SALEN(sa->sa)) < 0 && !sockwouldblock(sockerrno)) {
734 if(sockmsgsize(sockerrno)) {
735 // Compensate for SPTPS overhead
736 len -= SPTPS_DATAGRAM_OVERHEAD;
737 if(relay->maxmtu >= len)
738 relay->maxmtu = len - 1;
739 if(relay->mtu >= len)
740 relay->mtu = len - 1;
743 logger(DEBUG_TRAFFIC, LOG_WARNING, "Error sending UDP SPTPS packet to %s (%s): %s", relay->name, relay->hostname, sockstrerror(sockerrno));
751 bool send_sptps_data(void *handle, uint8_t type, const void *data, size_t len) {
752 return send_sptps_data_priv(handle, myself, type, data, len);
755 bool receive_sptps_record(void *handle, uint8_t type, const void *data, uint16_t len) {
756 node_t *from = handle;
758 if(type == SPTPS_HANDSHAKE) {
759 if(!from->status.validkey) {
760 from->status.validkey = true;
761 from->status.waitingforkey = false;
762 logger(DEBUG_META, LOG_INFO, "SPTPS key exchange with %s (%s) succesful", from->name, from->hostname);
768 logger(DEBUG_ALWAYS, LOG_ERR, "Packet from %s (%s) larger than maximum supported size (%d > %d)", from->name, from->hostname, len, MTU);
773 inpkt.offset = DEFAULT_PACKET_OFFSET;
775 if(type == PKT_PROBE) {
777 memcpy(DATA(&inpkt), data, len);
778 udp_probe_h(from, &inpkt, len);
782 if(type & ~(PKT_COMPRESSED | PKT_MAC)) {
783 logger(DEBUG_ALWAYS, LOG_ERR, "Unexpected SPTPS record type %d len %d from %s (%s)", type, len, from->name, from->hostname);
787 /* Check if we have the headers we need */
788 if(routing_mode != RMODE_ROUTER && !(type & PKT_MAC)) {
789 logger(DEBUG_TRAFFIC, LOG_ERR, "Received packet from %s (%s) without MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
791 } else if(routing_mode == RMODE_ROUTER && (type & PKT_MAC)) {
792 logger(DEBUG_TRAFFIC, LOG_WARNING, "Received packet from %s (%s) with MAC header (maybe Mode is not set correctly)", from->name, from->hostname);
795 int offset = (type & PKT_MAC) ? 0 : 14;
796 if(type & PKT_COMPRESSED) {
797 length_t ulen = uncompress_packet(DATA(&inpkt) + offset, (const uint8_t *)data, len, from->incompression);
801 inpkt.len = ulen + offset;
803 if(inpkt.len > MAXSIZE)
806 memcpy(DATA(&inpkt) + offset, data, len);
807 inpkt.len = len + offset;
810 /* Generate the Ethernet packet type if necessary */
812 switch(DATA(&inpkt)[14] >> 4) {
814 DATA(&inpkt)[12] = 0x08;
815 DATA(&inpkt)[13] = 0x00;
818 DATA(&inpkt)[12] = 0x86;
819 DATA(&inpkt)[13] = 0xDD;
822 logger(DEBUG_TRAFFIC, LOG_ERR,
823 "Unknown IP version %d while reading packet from %s (%s)",
824 DATA(&inpkt)[14] >> 4, from->name, from->hostname);
829 receive_packet(from, &inpkt);
833 // This function tries to get SPTPS keys, if they aren't already known.
834 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if the keys are available.
835 static void try_sptps(node_t *n) {
836 if(n->status.validkey)
839 logger(DEBUG_TRAFFIC, LOG_INFO, "No valid key known yet for %s (%s)", n->name, n->hostname);
841 if(!n->status.waitingforkey)
843 else if(n->last_req_key + 10 < now.tv_sec) {
844 logger(DEBUG_ALWAYS, LOG_DEBUG, "No key from %s after 10 seconds, restarting SPTPS", n->name);
845 sptps_stop(&n->sptps);
846 n->status.waitingforkey = false;
853 static void send_udp_probe_packet(node_t *n, int len) {
855 packet.offset = DEFAULT_PACKET_OFFSET;
856 memset(DATA(&packet), 0, 14);
857 randomize(DATA(&packet) + 14, len - 14);
861 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending UDP probe length %d to %s (%s)", len, n->name, n->hostname);
863 send_udppacket(n, &packet);
866 // This function tries to establish a UDP tunnel to a node so that packets can be sent.
867 // If a tunnel is already established, it makes sure it stays up.
868 // This function makes no guarantees - it is up to the caller to check the node's state to figure out if UDP is usable.
869 static void try_udp(node_t* n) {
873 struct timeval ping_tx_elapsed;
874 timersub(&now, &n->udp_ping_sent, &ping_tx_elapsed);
876 int interval = n->status.udp_confirmed ? udp_discovery_keepalive_interval : udp_discovery_interval;
878 if(ping_tx_elapsed.tv_sec >= interval) {
879 send_udp_probe_packet(n, MAX(n->minmtu, 16));
880 n->udp_ping_sent = now;
882 if(localdiscovery && !n->status.udp_confirmed && n->prevedge) {
883 n->status.send_locally = true;
884 send_udp_probe_packet(n, 16);
885 n->status.send_locally = false;
890 static length_t choose_initial_maxmtu(node_t *n) {
895 const sockaddr_t *sa = NULL;
897 choose_udp_address(n, &sa, &sockindex);
901 sock = socket(sa->sa.sa_family, SOCK_DGRAM, IPPROTO_UDP);
903 logger(DEBUG_TRAFFIC, LOG_ERR, "Creating MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
907 if(connect(sock, &sa->sa, SALEN(sa->sa))) {
908 logger(DEBUG_TRAFFIC, LOG_ERR, "Connecting MTU assessment socket for %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
914 socklen_t ip_mtu_len = sizeof ip_mtu;
915 if(getsockopt(sock, IPPROTO_IP, IP_MTU, &ip_mtu, &ip_mtu_len)) {
916 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) failed: %s", n->name, n->hostname, sockstrerror(sockerrno));
923 /* getsockopt(IP_MTU) returns the MTU of the physical interface.
924 We need to remove various overheads to get to the tinc MTU. */
925 length_t mtu = ip_mtu;
926 mtu -= (sa->sa.sa_family == AF_INET6) ? sizeof(struct ip6_hdr) : sizeof(struct ip);
928 if(n->status.sptps) {
929 mtu -= SPTPS_DATAGRAM_OVERHEAD;
930 if((n->options >> 24) >= 4)
931 mtu -= sizeof(node_id_t) + sizeof(node_id_t);
935 logger(DEBUG_TRAFFIC, LOG_ERR, "getsockopt(IP_MTU) on %s (%s) returned absurdly small value: %d", n->name, n->hostname, ip_mtu);
941 logger(DEBUG_TRAFFIC, LOG_INFO, "Using system-provided maximum tinc MTU for %s (%s): %hd", n->name, n->hostname, mtu);
951 /* This function tries to determines the MTU of a node.
952 By calling this function repeatedly, n->minmtu will be progressively
953 increased, and at some point, n->mtu will be fixed to n->minmtu. If the MTU
954 is already fixed, this function checks if it can be increased.
957 static void try_mtu(node_t *n) {
958 if(!(n->options & OPTION_PMTU_DISCOVERY))
961 if(udp_discovery && !n->status.udp_confirmed) {
968 /* mtuprobes == 0..19: initial discovery, send bursts with 1 second interval, mtuprobes++
969 mtuprobes == 20: fix MTU, and go to -1
970 mtuprobes == -1: send one >maxmtu probe every pingtimeout */
972 struct timeval elapsed;
973 timersub(&now, &n->probe_sent_time, &elapsed);
974 if(n->mtuprobes >= 0) {
975 if(n->mtuprobes != 0 && elapsed.tv_sec == 0 && elapsed.tv_usec < 333333)
978 if(elapsed.tv_sec < pingtimeout)
984 if(n->mtuprobes < 0) {
985 /* After the initial discovery, we only send one >maxmtu probe
986 to detect PMTU increases. */
987 if(n->maxmtu + 1 < MTU)
988 send_udp_probe_packet(n, n->maxmtu + 1);
990 /* Before initial discovery begins, set maxmtu to the most likely value.
991 If it's underestimated, we will correct it after initial discovery. */
992 if(n->mtuprobes == 0)
993 n->maxmtu = choose_initial_maxmtu(n);
996 /* Decreasing the number of probes per cycle might make the algorithm react faster to lost packets,
997 but it will typically increase convergence time in the no-loss case. */
998 const length_t probes_per_cycle = 8;
1000 /* This magic value was determined using math simulations.
1001 It will result in a 1329-byte first probe, followed (if there was a reply) by a 1407-byte probe.
1002 Since 1407 is just below the range of tinc MTUs over typical networks,
1003 this fine-tuning allows tinc to cover a lot of ground very quickly.
1004 This fine-tuning is only valid for maxmtu = MTU; if maxmtu is smaller,
1005 then it's better to use a multiplier of 1. Indeed, this leads to an interesting scenario
1006 if choose_initial_maxmtu() returns the actual MTU value - it will get confirmed with one single probe. */
1007 const float multiplier = (n->maxmtu == MTU) ? 0.97 : 1;
1009 const float cycle_position = probes_per_cycle - (n->mtuprobes % probes_per_cycle) - 1;
1010 const length_t minmtu = MAX(n->minmtu, 512);
1011 const float interval = n->maxmtu - minmtu;
1013 /* The core of the discovery algorithm is this exponential.
1014 It produces very large probes early in the cycle, and then it very quickly decreases the probe size.
1015 This reflects the fact that in the most difficult cases, we don't get any feedback for probes that
1016 are too large, and therefore we need to concentrate on small offsets so that we can quickly converge
1017 on the precise MTU as we are approaching it.
1018 The last probe of the cycle is always 1 byte in size - this is to make sure we'll get at least one
1019 reply per cycle so that we can make progress. */
1020 const length_t offset = powf(interval, multiplier * cycle_position / (probes_per_cycle - 1));
1022 length_t maxmtu = n->maxmtu;
1023 send_udp_probe_packet(n, minmtu + offset);
1024 /* If maxmtu changed, it means the probe was rejected by the system because it was too large.
1025 In that case, we recalculate with the new maxmtu and try again. */
1026 if(n->mtuprobes < 0 || maxmtu == n->maxmtu)
1030 if(n->mtuprobes >= 0)
1034 n->probe_counter = 0;
1035 n->probe_sent_time = now;
1036 n->probe_time = now;
1038 /* Calculate the packet loss of incoming traffic by comparing the rate of
1039 packets received to the rate with which the sequence number has increased.
1040 TODO: this is unrelated to PMTU discovery - it should be moved elsewhere.
1043 if(n->received > n->prev_received)
1044 n->packetloss = 1.0 - (n->received - n->prev_received) / (float)(n->received_seqno - n->prev_received_seqno);
1046 n->packetloss = n->received_seqno <= n->prev_received_seqno;
1048 n->prev_received_seqno = n->received_seqno;
1049 n->prev_received = n->received;
1052 /* These functions try to establish a tunnel to a node (or its relay) so that
1053 packets can be sent (e.g. exchange keys).
1054 If a tunnel is already established, it tries to improve it (e.g. by trying
1055 to establish a UDP tunnel instead of TCP). This function makes no
1056 guarantees - it is up to the caller to check the node's state to figure out
1057 if TCP and/or UDP is usable. By calling this function repeatedly, the
1058 tunnel is gradually improved until we hit the wall imposed by the underlying
1059 network environment. It is recommended to call this function every time a
1060 packet is sent (or intended to be sent) to a node, so that the tunnel keeps
1061 improving as packets flow, and then gracefully downgrades itself as it goes
1065 static void try_tx_sptps(node_t *n) {
1066 /* If n is a TCP-only neighbor, we'll only use "cleartext" PACKET
1067 messages anyway, so there's no need for SPTPS at all. */
1069 if(n->connection && ((myself->options | n->options) & OPTION_TCPONLY))
1072 /* Otherwise, try to do SPTPS authentication with n if necessary. */
1076 /* Do we need to relay packets? */
1078 node_t *via = (n->via == myself) ? n->nexthop : n->via;
1080 /* If the relay doesn't support SPTPS, everything goes via TCP anyway. */
1082 if((via->options >> 24) < 4)
1085 /* If we do have a relay, try everything with that one instead. */
1088 return try_tx_sptps(via);
1094 static void try_tx_legacy(node_t *n) {
1095 /* Check if we already have a key, or request one. */
1097 if(!n->status.validkey) {
1098 if(n->last_req_key + 10 <= now.tv_sec) {
1100 n->last_req_key = now.tv_sec;
1109 void send_packet(node_t *n, vpn_packet_t *packet) {
1110 // If it's for myself, write it to the tun/tap device.
1114 memcpy(DATA(packet), mymac.x, ETH_ALEN);
1116 n->out_bytes += packet->len;
1117 devops.write(packet);
1121 logger(DEBUG_TRAFFIC, LOG_ERR, "Sending packet of %d bytes to %s (%s)", packet->len, n->name, n->hostname);
1123 // If the node is not reachable, drop it.
1125 if(!n->status.reachable) {
1126 logger(DEBUG_TRAFFIC, LOG_INFO, "Node %s (%s) is not reachable", n->name, n->hostname);
1130 // Keep track of packet statistics.
1133 n->out_bytes += packet->len;
1135 // Check if it should be sent as an SPTPS packet.
1137 if(n->status.sptps) {
1138 send_sptps_packet(n, packet);
1143 // Determine which node to actually send it to.
1145 node_t *via = (packet->priority == -1 || n->via == myself) ? n->nexthop : n->via;
1148 logger(DEBUG_TRAFFIC, LOG_INFO, "Sending packet to %s via %s (%s)", n->name, via->name, n->via->hostname);
1150 // Try to send via UDP, unless TCP is forced.
1152 if(packet->priority == -1 || ((myself->options | via->options) & OPTION_TCPONLY)) {
1153 if(!send_tcppacket(via->connection, packet))
1154 terminate_connection(via->connection, true);
1158 send_udppacket(via, packet);
1162 void broadcast_packet(const node_t *from, vpn_packet_t *packet) {
1163 // Always give ourself a copy of the packet.
1165 send_packet(myself, packet);
1167 // In TunnelServer mode, do not forward broadcast packets.
1168 // The MST might not be valid and create loops.
1169 if(tunnelserver || broadcast_mode == BMODE_NONE)
1172 logger(DEBUG_TRAFFIC, LOG_INFO, "Broadcasting packet of %d bytes from %s (%s)",
1173 packet->len, from->name, from->hostname);
1175 switch(broadcast_mode) {
1176 // In MST mode, broadcast packets travel via the Minimum Spanning Tree.
1177 // This guarantees all nodes receive the broadcast packet, and
1178 // usually distributes the sending of broadcast packets over all nodes.
1180 for list_each(connection_t, c, connection_list)
1181 if(c->edge && c->status.mst && c != from->nexthop->connection)
1182 send_packet(c->node, packet);
1185 // In direct mode, we send copies to each node we know of.
1186 // However, this only reaches nodes that can be reached in a single hop.
1187 // We don't have enough information to forward broadcast packets in this case.
1192 for splay_each(node_t, n, node_tree)
1193 if(n->status.reachable && n != myself && ((n->via == myself && n->nexthop == n) || n->via == n))
1194 send_packet(n, packet);
1202 static node_t *try_harder(const sockaddr_t *from, const vpn_packet_t *pkt) {
1205 static time_t last_hard_try = 0;
1207 for splay_each(edge_t, e, edge_weight_tree) {
1208 if(!e->to->status.reachable || e->to == myself)
1211 if(sockaddrcmp_noport(from, &e->address)) {
1212 if(last_hard_try == now.tv_sec)
1217 if(!try_mac(e->to, pkt))
1225 last_hard_try = now.tv_sec;
1227 last_hard_try = now.tv_sec;
1231 void handle_incoming_vpn_data(void *data, int flags) {
1232 listen_socket_t *ls = data;
1235 node_id_t nullid = {};
1236 sockaddr_t addr = {};
1237 socklen_t addrlen = sizeof addr;
1239 bool direct = false;
1242 int len = recvfrom(ls->udp.fd, DATA(&pkt), MAXSIZE, 0, &addr.sa, &addrlen);
1244 if(len <= 0 || len > MAXSIZE) {
1245 if(!sockwouldblock(sockerrno))
1246 logger(DEBUG_ALWAYS, LOG_ERR, "Receiving packet failed: %s", sockstrerror(sockerrno));
1252 sockaddrunmap(&addr); /* Some braindead IPv6 implementations do stupid things. */
1254 // Try to figure out who sent this packet.
1256 node_t *n = lookup_node_udp(&addr);
1259 // It might be from a 1.1 node, which might have a source ID in the packet.
1260 pkt.offset = 2 * sizeof(node_id_t);
1261 from = lookup_node_id(SRCID(&pkt));
1262 if(from && !memcmp(DSTID(&pkt), &nullid, sizeof nullid) && from->status.sptps) {
1263 if(sptps_verify_datagram(&from->sptps, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t)))
1272 n = try_harder(&addr, &pkt);
1277 if(debug_level >= DEBUG_PROTOCOL) {
1278 hostname = sockaddr2hostname(&addr);
1279 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from unknown source %s", hostname);
1285 if(n->status.sptps) {
1286 pkt.offset = 2 * sizeof(node_id_t);
1288 if(!memcmp(DSTID(&pkt), &nullid, sizeof nullid)) {
1293 from = lookup_node_id(SRCID(&pkt));
1294 to = lookup_node_id(DSTID(&pkt));
1297 logger(DEBUG_PROTOCOL, LOG_WARNING, "Received UDP packet from %s (%s) with unknown source and/or destination ID", n->name, n->hostname);
1302 send_sptps_data_priv(to, n, 0, DATA(&pkt), pkt.len - 2 * sizeof(node_id_t));
1311 if(!receive_udppacket(from, &pkt))
1314 n->sock = ls - listen_socket;
1315 if(direct && sockaddrcmp(&addr, &n->address))
1316 update_node_udp(n, &addr);
1319 void handle_device_data(void *data, int flags) {
1320 vpn_packet_t packet;
1321 packet.offset = DEFAULT_PACKET_OFFSET;
1322 packet.priority = 0;
1324 if(devops.read(&packet)) {
1325 myself->in_packets++;
1326 myself->in_bytes += packet.len;
1327 route(myself, &packet);