Added comments and unfold deep "if"-construct in timeout_handler
[tinc] / src / net.c
1 /*
2     net.c -- most of the network code
3     Copyright (C) 1998-2005 Ivo Timmermans,
4                   2000-2015 Guus Sliepen <guus@tinc-vpn.org>
5                   2006      Scott Lamb <slamb@slamb.org>
6                   2011      Loïc Grenié <loic.grenie@gmail.com>
7
8     This program is free software; you can redistribute it and/or modify
9     it under the terms of the GNU General Public License as published by
10     the Free Software Foundation; either version 2 of the License, or
11     (at your option) any later version.
12
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16     GNU General Public License for more details.
17
18     You should have received a copy of the GNU General Public License along
19     with this program; if not, write to the Free Software Foundation, Inc.,
20     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23 #include "system.h"
24
25 #include "utils.h"
26 #include "conf.h"
27 #include "connection.h"
28 #include "device.h"
29 #include "graph.h"
30 #include "logger.h"
31 #include "meta.h"
32 #include "names.h"
33 #include "net.h"
34 #include "netutl.h"
35 #include "protocol.h"
36 #include "subnet.h"
37 #include "xalloc.h"
38
39 int contradicting_add_edge = 0;
40 int contradicting_del_edge = 0;
41 static int sleeptime = 10;
42 time_t last_config_check = 0;
43 static timeout_t pingtimer;
44 static timeout_t periodictimer;
45 static struct timeval last_periodic_run_time;
46
47 /* Purge edges and subnets of unreachable nodes. Use carefully. */
48
49 void purge(void) {
50         logger(DEBUG_PROTOCOL, LOG_DEBUG, "Purging unreachable nodes");
51
52         /* Remove all edges and subnets owned by unreachable nodes. */
53
54         for splay_each(node_t, n, node_tree) {
55                 if(!n->status.reachable) {
56                         logger(DEBUG_SCARY_THINGS, LOG_DEBUG, "Purging node %s (%s)", n->name, n->hostname);
57
58                         for splay_each(subnet_t, s, n->subnet_tree) {
59                                 send_del_subnet(everyone, s);
60                                 if(!strictsubnets)
61                                         subnet_del(n, s);
62                         }
63
64                         for splay_each(edge_t, e, n->edge_tree) {
65                                 if(!tunnelserver)
66                                         send_del_edge(everyone, e);
67                                 edge_del(e);
68                         }
69                 }
70         }
71
72         /* Check if anyone else claims to have an edge to an unreachable node. If not, delete node. */
73
74         for splay_each(node_t, n, node_tree) {
75                 if(!n->status.reachable) {
76                         for splay_each(edge_t, e, edge_weight_tree)
77                                 if(e->to == n)
78                                         return;
79
80                         if(!autoconnect && (!strictsubnets || !n->subnet_tree->head))
81                                 /* in strictsubnets mode do not delete nodes with subnets */
82                                 node_del(n);
83                 }
84         }
85 }
86
87 /*
88   Terminate a connection:
89   - Mark it as inactive
90   - Remove the edge representing this connection
91   - Kill it with fire
92   - Check if we need to retry making an outgoing connection
93 */
94 void terminate_connection(connection_t *c, bool report) {
95         logger(DEBUG_CONNECTIONS, LOG_NOTICE, "Closing connection with %s (%s)", c->name, c->hostname);
96
97         if(c->node && c->node->connection == c)
98                 c->node->connection = NULL;
99
100         if(c->edge) {
101                 if(report && !tunnelserver)
102                         send_del_edge(everyone, c->edge);
103
104                 edge_del(c->edge);
105                 c->edge = NULL;
106
107                 /* Run MST and SSSP algorithms */
108
109                 graph();
110
111                 /* If the node is not reachable anymore but we remember it had an edge to us, clean it up */
112
113                 if(report && !c->node->status.reachable) {
114                         edge_t *e;
115                         e = lookup_edge(c->node, myself);
116                         if(e) {
117                                 if(!tunnelserver)
118                                         send_del_edge(everyone, e);
119                                 edge_del(e);
120                         }
121                 }
122         }
123
124         outgoing_t *outgoing = c->outgoing;
125         connection_del(c);
126
127         /* Check if this was our outgoing connection */
128
129         if(outgoing)
130                 do_outgoing_connection(outgoing);
131
132 #ifndef HAVE_MINGW
133         /* Clean up dead proxy processes */
134
135         while(waitpid(-1, NULL, WNOHANG) > 0);
136 #endif
137 }
138
139 /*
140   Check if the other end is active.
141   If we have sent packets, but didn't receive any,
142   then possibly the other end is dead. We send a
143   PING request over the meta connection. If the other
144   end does not reply in time, we consider them dead
145   and close the connection.
146 */
147 static void timeout_handler(void *data) {
148
149         bool close_all_connections = false;
150
151         /*
152                  timeout_handler will start after 30 seconds from start of tincd
153                  hold information about the elapsed time since last time the handler
154                  has been run
155         */
156         long sleep_time = now.tv_sec - last_periodic_run_time.tv_sec;
157         /*
158                  It seems that finding sane default value is harder than expected
159                  Since we send every second a UDP packet to make holepunching work
160                  And default UDP state expire on firewalls is between 15-30 seconds
161                  we drop all connections after 60 Seconds - UDPDiscoveryTimeout=30
162                  by default
163         */
164         if (sleep_time > 2 * udp_discovery_timeout) {
165                 logger(DEBUG_ALWAYS, LOG_ERR, "Awaking from dead after %ld seconds of sleep", sleep_time);
166                 /*
167                         Do not send any packets to tinc after we wake up.
168                         The other node probably closed our connection but we still
169                         are holding context information to them. This may happen on
170                         laptops or any other hardware which can be suspended for some time.
171                         Sending any data to node that wasn't expecting it will produce
172                         annoying and misleading errors on the other side about failed signature
173                         verification and or about missing sptps context
174                 */
175                 close_all_connections = true;
176         }
177         last_periodic_run_time = now;
178
179         for list_each(connection_t, c, connection_list) {
180                 // control connections (eg. tinc ctl) do not have any timeout
181                 if(c->status.control)
182                         continue;
183
184                 if(close_all_connections) {
185                         logger(DEBUG_ALWAYS, LOG_ERR, "Forcing connection close after sleep time %s (%s)", c->name, c->hostname);
186                         terminate_connection(c, c->edge);
187                         continue;
188                 }
189
190                 // Bail out early if we haven't reached the ping timeout for this node yet
191                 if(c->last_ping_time + pingtimeout > now.tv_sec)
192                         continue;
193
194                 // timeout during connection establishing
195                 if(!c->edge) {
196                         if(c->status.connecting)
197                                 logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout while connecting to %s (%s)", c->name, c->hostname);
198                         else
199                                 logger(DEBUG_CONNECTIONS, LOG_WARNING, "Timeout from %s (%s) during authentication", c->name, c->hostname);
200
201                         terminate_connection(c, c->edge);
202                         continue;
203                 }
204
205                 // helps in UDP holepunching
206                 try_tx(c->node, false);
207
208                 // timeout during ping
209                 if(c->status.pinged) {
210                         logger(DEBUG_CONNECTIONS, LOG_INFO, "%s (%s) didn't respond to PING in %ld seconds", c->name, c->hostname, (long)now.tv_sec - c->last_ping_time);
211                         terminate_connection(c, c->edge);
212                         continue;
213                 }
214
215                 // check whether we need to send a new ping
216                 if(c->last_ping_time + pinginterval <= now.tv_sec)
217                         send_ping(c);
218         }
219
220         timeout_set(data, &(struct timeval){1, rand() % 100000});
221 }
222
223 static void periodic_handler(void *data) {
224         /* Check if there are too many contradicting ADD_EDGE and DEL_EDGE messages.
225            This usually only happens when another node has the same Name as this node.
226            If so, sleep for a short while to prevent a storm of contradicting messages.
227         */
228
229         if(contradicting_del_edge > 100 && contradicting_add_edge > 100) {
230                 logger(DEBUG_ALWAYS, LOG_WARNING, "Possible node with same Name as us! Sleeping %d seconds.", sleeptime);
231                 nanosleep(&(struct timespec){sleeptime, 0}, NULL);
232                 sleeptime *= 2;
233                 if(sleeptime < 0)
234                         sleeptime = 3600;
235         } else {
236                 sleeptime /= 2;
237                 if(sleeptime < 10)
238                         sleeptime = 10;
239         }
240
241         contradicting_add_edge = 0;
242         contradicting_del_edge = 0;
243
244         /* If AutoConnect is set, check if we need to make or break connections. */
245
246         if(autoconnect && node_tree->count > 1) {
247                 /* Count number of active connections */
248                 int nc = 0;
249                 for list_each(connection_t, c, connection_list) {
250                         if(c->edge)
251                                 nc++;
252                 }
253
254                 if(nc < 3) {
255                         /* Not enough active connections, try to add one.
256                            Choose a random node, if we don't have a connection to it,
257                            and we are not already trying to make one, create an
258                            outgoing connection to this node.
259                         */
260                         int count = 0;
261                         for splay_each(node_t, n, node_tree) {
262                                 if(n == myself || n->connection || !(n->status.has_address || n->status.reachable))
263                                         continue;
264                                 count++;
265                         }
266
267                         if(!count)
268                                 goto end;
269
270                         int r = rand() % count;
271
272                         for splay_each(node_t, n, node_tree) {
273                                 if(n == myself || n->connection || !(n->status.has_address || n->status.reachable))
274                                         continue;
275
276                                 if(r--)
277                                         continue;
278
279                                 bool found = false;
280
281                                 for list_each(outgoing_t, outgoing, outgoing_list) {
282                                         if(!strcmp(outgoing->name, n->name)) {
283                                                 found = true;
284                                                 break;
285                                         }
286                                 }
287
288                                 if(!found) {
289                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Autoconnecting to %s", n->name);
290                                         outgoing_t *outgoing = xzalloc(sizeof *outgoing);
291                                         outgoing->name = xstrdup(n->name);
292                                         list_insert_tail(outgoing_list, outgoing);
293                                         setup_outgoing_connection(outgoing);
294                                 }
295
296                                 break;
297                         }
298                 } else if(nc > 3) {
299                         /* Too many active connections, try to remove one.
300                            Choose a random outgoing connection to a node
301                            that has at least one other connection.
302                         */
303                         int r = rand() % nc;
304                         int i = 0;
305
306                         for list_each(connection_t, c, connection_list) {
307                                 if(!c->edge)
308                                         continue;
309
310                                 if(i++ != r)
311                                         continue;
312
313                                 if(!c->outgoing || !c->node || c->node->edge_tree->count < 2)
314                                         break;
315
316                                 logger(DEBUG_CONNECTIONS, LOG_INFO, "Autodisconnecting from %s", c->name);
317                                 list_delete(outgoing_list, c->outgoing);
318                                 c->outgoing = NULL;
319                                 terminate_connection(c, c->edge);
320                                 break;
321                         }
322                 }
323
324                 if(nc >= 3) {
325                         /* If we have enough active connections,
326                            remove any pending outgoing connections.
327                         */
328                         for list_each(outgoing_t, o, outgoing_list) {
329                                 bool found = false;
330                                 for list_each(connection_t, c, connection_list) {
331                                         if(c->outgoing == o) {
332                                                 found = true;
333                                                 break;
334                                         }
335                                 }
336                                 if(!found) {
337                                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Cancelled outgoing connection to %s", o->name);
338                                         list_delete_node(outgoing_list, node);
339                                 }
340                         }
341                 }
342         }
343
344 end:
345         timeout_set(data, &(struct timeval){5, rand() % 100000});
346 }
347
348 void handle_meta_connection_data(connection_t *c) {
349         if (!receive_meta(c)) {
350                 terminate_connection(c, c->edge);
351                 return;
352         }
353 }
354
355 #ifndef HAVE_MINGW
356 static void sigterm_handler(void *data) {
357         logger(DEBUG_ALWAYS, LOG_NOTICE, "Got %s signal", strsignal(((signal_t *)data)->signum));
358         event_exit();
359 }
360
361 static void sighup_handler(void *data) {
362         logger(DEBUG_ALWAYS, LOG_NOTICE, "Got %s signal", strsignal(((signal_t *)data)->signum));
363         reopenlogger();
364         if(reload_configuration())
365                 exit(1);
366 }
367
368 static void sigalrm_handler(void *data) {
369         logger(DEBUG_ALWAYS, LOG_NOTICE, "Got %s signal", strsignal(((signal_t *)data)->signum));
370         retry();
371 }
372 #endif
373
374 int reload_configuration(void) {
375         char fname[PATH_MAX];
376
377         /* Reread our own configuration file */
378
379         exit_configuration(&config_tree);
380         init_configuration(&config_tree);
381
382         if(!read_server_config()) {
383                 logger(DEBUG_ALWAYS, LOG_ERR, "Unable to reread configuration file.");
384                 return EINVAL;
385         }
386
387         read_config_options(config_tree, NULL);
388
389         snprintf(fname, sizeof fname, "%s" SLASH "hosts" SLASH "%s", confbase, myself->name);
390         read_config_file(config_tree, fname);
391
392         /* Parse some options that are allowed to be changed while tinc is running */
393
394         setup_myself_reloadable();
395
396         /* If StrictSubnet is set, expire deleted Subnets and read new ones in */
397
398         if(strictsubnets) {
399                 for splay_each(subnet_t, subnet, subnet_tree)
400                         if (subnet->owner)
401                                 subnet->expires = 1;
402         }
403
404         for splay_each(node_t, n, node_tree)
405                 n->status.has_address = false;
406
407         load_all_nodes();
408
409         if(strictsubnets) {
410                 for splay_each(subnet_t, subnet, subnet_tree) {
411                         if (!subnet->owner)
412                                 continue;
413                         if(subnet->expires == 1) {
414                                 send_del_subnet(everyone, subnet);
415                                 if(subnet->owner->status.reachable)
416                                         subnet_update(subnet->owner, subnet, false);
417                                 subnet_del(subnet->owner, subnet);
418                         } else if(subnet->expires == -1) {
419                                 subnet->expires = 0;
420                         } else {
421                                 send_add_subnet(everyone, subnet);
422                                 if(subnet->owner->status.reachable)
423                                         subnet_update(subnet->owner, subnet, true);
424                         }
425                 }
426         } else { /* Only read our own subnets back in */
427                 for splay_each(subnet_t, subnet, myself->subnet_tree)
428                         if(!subnet->expires)
429                                 subnet->expires = 1;
430
431                 config_t *cfg = lookup_config(config_tree, "Subnet");
432
433                 while(cfg) {
434                         subnet_t *subnet, *s2;
435
436                         if(!get_config_subnet(cfg, &subnet))
437                                 continue;
438
439                         if((s2 = lookup_subnet(myself, subnet))) {
440                                 if(s2->expires == 1)
441                                         s2->expires = 0;
442
443                                 free_subnet(subnet);
444                         } else {
445                                 subnet_add(myself, subnet);
446                                 send_add_subnet(everyone, subnet);
447                                 subnet_update(myself, subnet, true);
448                         }
449
450                         cfg = lookup_config_next(config_tree, cfg);
451                 }
452
453                 for splay_each(subnet_t, subnet, myself->subnet_tree) {
454                         if(subnet->expires == 1) {
455                                 send_del_subnet(everyone, subnet);
456                                 subnet_update(myself, subnet, false);
457                                 subnet_del(myself, subnet);
458                         }
459                 }
460         }
461
462         /* Try to make outgoing connections */
463
464         try_outgoing_connections();
465
466         /* Close connections to hosts that have a changed or deleted host config file */
467
468         for list_each(connection_t, c, connection_list) {
469                 if(c->status.control)
470                         continue;
471
472                 snprintf(fname, sizeof fname, "%s" SLASH "hosts" SLASH "%s", confbase, c->name);
473                 struct stat s;
474                 if(stat(fname, &s) || s.st_mtime > last_config_check) {
475                         logger(DEBUG_CONNECTIONS, LOG_INFO, "Host config file of %s has been changed", c->name);
476                         terminate_connection(c, c->edge);
477                 }
478         }
479
480         last_config_check = now.tv_sec;
481
482         return 0;
483 }
484
485 void retry(void) {
486         /* Reset the reconnection timers for all outgoing connections */
487         for list_each(outgoing_t, outgoing, outgoing_list) {
488                 outgoing->timeout = 0;
489                 if(outgoing->ev.cb)
490                         timeout_set(&outgoing->ev, &(struct timeval){0, 0});
491         }
492
493         /* Check for outgoing connections that are in progress, and reset their ping timers */
494         for list_each(connection_t, c, connection_list) {
495                 if(c->outgoing && !c->node)
496                         c->last_ping_time = 0;
497         }
498
499         /* Kick the ping timeout handler */
500         timeout_set(&pingtimer, &(struct timeval){0, 0});
501 }
502
503 /*
504   this is where it all happens...
505 */
506 int main_loop(void) {
507         last_periodic_run_time = now;
508         timeout_add(&pingtimer, timeout_handler, &pingtimer, &(struct timeval){pingtimeout, rand() % 100000});
509         timeout_add(&periodictimer, periodic_handler, &periodictimer, &(struct timeval){0, 0});
510
511 #ifndef HAVE_MINGW
512         signal_t sighup = {0};
513         signal_t sigterm = {0};
514         signal_t sigquit = {0};
515         signal_t sigint = {0};
516         signal_t sigalrm = {0};
517
518         signal_add(&sighup, sighup_handler, &sighup, SIGHUP);
519         signal_add(&sigterm, sigterm_handler, &sigterm, SIGTERM);
520         signal_add(&sigquit, sigterm_handler, &sigquit, SIGQUIT);
521         signal_add(&sigint, sigterm_handler, &sigint, SIGINT);
522         signal_add(&sigalrm, sigalrm_handler, &sigalrm, SIGALRM);
523 #endif
524
525         if(!event_loop()) {
526                 logger(DEBUG_ALWAYS, LOG_ERR, "Error while waiting for input: %s", sockstrerror(sockerrno));
527                 return 1;
528         }
529
530 #ifndef HAVE_MINGW
531         signal_del(&sighup);
532         signal_del(&sigterm);
533         signal_del(&sigquit);
534         signal_del(&sigint);
535         signal_del(&sigalrm);
536 #endif
537
538         timeout_del(&periodictimer);
539         timeout_del(&pingtimer);
540
541         return 0;
542 }