899b62a077abb435e5327b359bacc41402ccaaf1
[tinc] / src / event.c
1 /*
2     event.c -- I/O, timeout and signal event handling
3     Copyright (C) 2012-2022 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include <assert.h>
23
24 #ifdef HAVE_SYS_EPOLL_H
25 #include <sys/epoll.h>
26 #endif
27
28 #include "dropin.h"
29 #include "event.h"
30 #include "utils.h"
31 #include "net.h"
32
33 struct timeval now;
34 #ifndef HAVE_MINGW
35
36 #ifdef HAVE_SYS_EPOLL_H
37 static int epollset = 0;
38 #else
39 static fd_set readfds;
40 static fd_set writefds;
41 #endif
42
43 #else
44 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
45 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
46 static DWORD event_count = 0;
47 #endif
48 static bool running;
49
50 #ifdef HAVE_SYS_EPOLL_H
51 static inline int event_epoll_init(void) {
52         /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
53                 Decent kernels will ignore this value making it unlimited.
54                 epoll_create1 might be better, but these kernels would not be supported
55                 in that case.
56         */
57         return epoll_create(1024);
58 }
59 #endif
60
61 static int io_compare(const io_t *a, const io_t *b) {
62 #ifndef HAVE_MINGW
63         return a->fd - b->fd;
64 #else
65
66         if(a->event < b->event) {
67                 return -1;
68         }
69
70         if(a->event > b->event) {
71                 return 1;
72         }
73
74         return 0;
75 #endif
76 }
77
78 static int timeout_compare(const timeout_t *a, const timeout_t *b) {
79         struct timeval diff;
80         timersub(&a->tv, &b->tv, &diff);
81
82         if(diff.tv_sec < 0) {
83                 return -1;
84         }
85
86         if(diff.tv_sec > 0) {
87                 return 1;
88         }
89
90         if(diff.tv_usec < 0) {
91                 return -1;
92         }
93
94         if(diff.tv_usec > 0) {
95                 return 1;
96         }
97
98         if(a < b) {
99                 return -1;
100         }
101
102         if(a > b) {
103                 return 1;
104         }
105
106         return 0;
107 }
108
109 static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
110 static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
111
112 void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
113         if(io->cb) {
114                 return;
115         }
116
117         io->fd = fd;
118 #ifdef HAVE_MINGW
119
120         if(io->fd != -1) {
121                 io->event = WSACreateEvent();
122
123                 if(io->event == WSA_INVALID_EVENT) {
124                         abort();
125                 }
126         }
127
128         event_count++;
129 #endif
130         io->cb = cb;
131         io->data = data;
132         io->node.data = io;
133
134         io_set(io, flags);
135
136 #ifndef HAVE_SYS_EPOLL_H
137
138         if(!splay_insert_node(&io_tree, &io->node)) {
139                 abort();
140         }
141
142 #endif
143 }
144
145 #ifdef HAVE_MINGW
146 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
147         io->event = event;
148         io_add(io, cb, data, -1, 0);
149 }
150 #endif
151
152 void io_set(io_t *io, int flags) {
153 #ifdef HAVE_SYS_EPOLL_H
154
155         if(!epollset) {
156                 epollset = event_epoll_init();
157         }
158
159 #endif
160
161         if(flags == io->flags) {
162                 return;
163         }
164
165         io->flags = flags;
166
167         if(io->fd == -1) {
168                 return;
169         }
170
171 #ifndef HAVE_MINGW
172 #ifdef HAVE_SYS_EPOLL_H
173         epoll_ctl(epollset, EPOLL_CTL_DEL, io->fd, NULL);
174
175         struct epoll_event ev = {
176                 .events = 0,
177                 .data.ptr = io,
178         };
179
180         if(flags & IO_READ) {
181                 ev.events |= EPOLLIN;
182         }
183
184         if(flags & IO_WRITE) {
185                 ev.events |= EPOLLOUT;
186         } else if(ev.events == 0) {
187                 io_tree.generation++;
188                 return;
189         }
190
191         if(epoll_ctl(epollset, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
192                 perror("epoll_ctl_add");
193         }
194
195 #else
196
197         if(flags & IO_READ) {
198                 FD_SET(io->fd, &readfds);
199         } else {
200                 FD_CLR(io->fd, &readfds);
201         }
202
203         if(flags & IO_WRITE) {
204                 FD_SET(io->fd, &writefds);
205         } else {
206                 FD_CLR(io->fd, &writefds);
207         }
208
209 #endif
210 #else
211         long events = 0;
212
213         if(flags & IO_WRITE) {
214                 events |= WRITE_EVENTS;
215         }
216
217         if(flags & IO_READ) {
218                 events |= READ_EVENTS;
219         }
220
221         if(WSAEventSelect(io->fd, io->event, events) != 0) {
222                 abort();
223         }
224
225 #endif
226 }
227
228 void io_del(io_t *io) {
229         if(!io->cb) {
230                 return;
231         }
232
233         io_set(io, 0);
234 #ifdef HAVE_MINGW
235
236         if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
237                 abort();
238         }
239
240         event_count--;
241 #endif
242
243 #ifndef HAVE_SYS_EPOLL_H
244         splay_unlink_node(&io_tree, &io->node);
245 #endif
246         io->cb = NULL;
247 }
248
249 void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
250         timeout->cb = cb;
251         timeout->data = data;
252         timeout->node.data = timeout;
253
254         timeout_set(timeout, tv);
255 }
256
257 void timeout_set(timeout_t *timeout, struct timeval *tv) {
258         if(timerisset(&timeout->tv)) {
259                 splay_unlink_node(&timeout_tree, &timeout->node);
260         }
261
262         if(!now.tv_sec) {
263                 gettimeofday(&now, NULL);
264         }
265
266         timeradd(&now, tv, &timeout->tv);
267
268         if(!splay_insert_node(&timeout_tree, &timeout->node)) {
269                 abort();
270         }
271 }
272
273 void timeout_del(timeout_t *timeout) {
274         if(!timeout->cb) {
275                 return;
276         }
277
278         splay_unlink_node(&timeout_tree, &timeout->node);
279         timeout->cb = 0;
280         timeout->tv = (struct timeval) {
281                 0, 0
282         };
283 }
284
285 #ifndef HAVE_MINGW
286
287 // From Matz's Ruby
288 #ifndef NSIG
289 # define NSIG (_SIGMAX + 1)      /* For QNX */
290 #endif
291
292
293 static io_t signalio;
294 static int pipefd[2] = {-1, -1};
295 static signal_t *signal_handle[NSIG + 1] = {NULL};
296
297 static void signal_handler(int signum) {
298         unsigned char num = signum;
299
300         if(write(pipefd[1], &num, 1) != 1) {
301                 // Pipe full or broken, nothing we can do about it.
302         }
303 }
304
305 static void signalio_handler(void *data, int flags) {
306         (void)data;
307         (void)flags;
308         unsigned char signum;
309
310         if(read(pipefd[0], &signum, 1) != 1) {
311                 return;
312         }
313
314         signal_t *sig = signal_handle[signum];
315
316         if(sig) {
317                 sig->cb(sig->data);
318         }
319 }
320
321 static void pipe_init(void) {
322         if(!pipe(pipefd)) {
323                 io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
324         }
325 }
326
327 void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
328         if(sig->cb) {
329                 return;
330         }
331
332         sig->signum = signum;
333         sig->cb = cb;
334         sig->data = data;
335
336         if(pipefd[0] == -1) {
337                 pipe_init();
338         }
339
340         signal(signum, signal_handler);
341
342         signal_handle[signum] = sig;
343 }
344
345 void signal_del(signal_t *sig) {
346         if(!sig->cb) {
347                 return;
348         }
349
350         signal(sig->signum, SIG_DFL);
351
352         signal_handle[sig->signum] = NULL;
353         sig->cb = NULL;
354 }
355 #endif
356
357 static struct timeval *timeout_execute(struct timeval *diff) {
358         gettimeofday(&now, NULL);
359         struct timeval *tv = NULL;
360
361         while(timeout_tree.head) {
362                 timeout_t *timeout = timeout_tree.head->data;
363                 timersub(&timeout->tv, &now, diff);
364
365                 if(diff->tv_sec < 0) {
366                         timeout->cb(timeout->data);
367
368                         if(timercmp(&timeout->tv, &now, <)) {
369                                 timeout_del(timeout);
370                         }
371                 } else {
372                         tv = diff;
373                         break;
374                 }
375         }
376
377         return tv;
378 }
379
380 bool event_loop(void) {
381         running = true;
382
383 #ifndef HAVE_MINGW
384
385 #ifdef HAVE_SYS_EPOLL_H
386
387         if(!epollset) {
388                 epollset = event_epoll_init();
389         }
390
391 #else
392         fd_set readable;
393         fd_set writable;
394 #endif
395
396         while(running) {
397                 struct timeval diff;
398                 struct timeval *tv = timeout_execute(&diff);
399 #ifndef HAVE_SYS_EPOLL_H
400                 memcpy(&readable, &readfds, sizeof(readable));
401                 memcpy(&writable, &writefds, sizeof(writable));
402 #endif
403
404
405 #ifdef HAVE_SYS_EPOLL_H
406                 struct epoll_event events[EPOLL_MAX_EVENTS_PER_LOOP];
407                 long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
408
409                 if(timeout > INT_MAX) {
410                         timeout = INT_MAX;
411                 }
412
413                 int n = epoll_wait(epollset, events, EPOLL_MAX_EVENTS_PER_LOOP, (int)timeout);
414 #else
415                 int maxfds =  0;
416
417                 if(io_tree.tail) {
418                         io_t *last = io_tree.tail->data;
419                         maxfds = last->fd + 1;
420                 }
421
422                 int n = select(maxfds, &readable, &writable, NULL, tv);
423 #endif
424
425                 if(n < 0) {
426                         if(sockwouldblock(sockerrno)) {
427                                 continue;
428                         } else {
429                                 return false;
430                         }
431                 }
432
433                 if(!n) {
434                         continue;
435                 }
436
437                 unsigned int curgen = io_tree.generation;
438
439
440 #ifdef HAVE_SYS_EPOLL_H
441
442                 for(int i = 0; i < n; i++) {
443                         io_t *io = events[i].data.ptr;
444
445                         if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
446                                 io->cb(io->data, IO_WRITE);
447                         }
448
449                         if(curgen != io_tree.generation) {
450                                 break;
451                         }
452
453                         if(events[i].events & EPOLLIN && io->flags & IO_READ) {
454                                 io->cb(io->data, IO_READ);
455                         }
456
457                         if(curgen != io_tree.generation) {
458                                 break;
459                         }
460                 }
461
462 #else
463
464                 for splay_each(io_t, io, &io_tree) {
465                         if(FD_ISSET(io->fd, &writable)) {
466                                 io->cb(io->data, IO_WRITE);
467                         } else if(FD_ISSET(io->fd, &readable)) {
468                                 io->cb(io->data, IO_READ);
469                         } else {
470                                 continue;
471                         }
472
473                         /*
474                                 There are scenarios in which the callback will remove another io_t from the tree
475                                 (e.g. closing a double connection). Since splay_each does not support that, we
476                                 need to exit the loop if that happens. That's okay, since any remaining events will
477                                 get picked up by the next select() call.
478                         */
479                         if(curgen != io_tree.generation) {
480                                 break;
481                         }
482                 }
483
484 #endif
485         }
486
487 #else
488         assert(WSA_WAIT_EVENT_0 == 0);
489
490         while(running) {
491                 struct timeval diff;
492                 struct timeval *tv = timeout_execute(&diff);
493                 DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
494
495                 if(!event_count) {
496                         Sleep(timeout_ms);
497                         continue;
498                 }
499
500                 /*
501                    For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
502                    which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
503                    it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
504                    is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
505                    to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
506
507                    Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
508                    this event being fired again if ignored.
509                 */
510                 unsigned int curgen = io_tree.generation;
511
512                 for splay_each(io_t, io, &io_tree) {
513                         if(io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
514                                 io->cb(io->data, IO_WRITE);
515
516                                 if(curgen != io_tree.generation) {
517                                         break;
518                                 }
519                         }
520                 }
521
522                 if(event_count > WSA_MAXIMUM_WAIT_EVENTS) {
523                         WSASetLastError(WSA_INVALID_PARAMETER);
524                         return(false);
525                 }
526
527                 WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS];
528                 io_t *io_map[WSA_MAXIMUM_WAIT_EVENTS];
529                 DWORD event_index = 0;
530
531                 for splay_each(io_t, io, &io_tree) {
532                         events[event_index] = io->event;
533                         io_map[event_index] = io;
534                         event_index++;
535                 }
536
537                 /*
538                  * If the generation number changes due to event addition
539                  * or removal by a callback we restart the loop.
540                  */
541                 curgen = io_tree.generation;
542
543                 for(DWORD event_offset = 0; event_offset < event_count;) {
544                         DWORD result = WSAWaitForMultipleEvents(event_count - event_offset, &events[event_offset], FALSE, timeout_ms, FALSE);
545
546                         if(result == WSA_WAIT_TIMEOUT) {
547                                 break;
548                         }
549
550                         if(result >= event_count - event_offset) {
551                                 return false;
552                         }
553
554                         /* Look up io in the map by index. */
555                         event_index = result + event_offset;
556                         io_t *io = io_map[event_index];
557
558                         if(io->fd == -1) {
559                                 io->cb(io->data, 0);
560
561                                 if(curgen != io_tree.generation) {
562                                         break;
563                                 }
564                         } else {
565                                 WSANETWORKEVENTS network_events;
566
567                                 if(WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) {
568                                         return(false);
569                                 }
570
571                                 if(network_events.lNetworkEvents & READ_EVENTS) {
572                                         io->cb(io->data, IO_READ);
573
574                                         if(curgen != io_tree.generation) {
575                                                 break;
576                                         }
577                                 }
578
579                                 /*
580                                     The fd might be available for write too. However, if we already fired the read callback, that
581                                     callback might have deleted the io (e.g. through terminate_connection()), so we can't fire the
582                                     write callback here. Instead, we loop back and let the writable io loop above handle it.
583                                  */
584                         }
585
586                         /* Continue checking the rest of the events. */
587                         event_offset = event_index + 1;
588
589                         /* Just poll the next time through. */
590                         timeout_ms = 0;
591                 }
592         }
593
594 #endif
595
596         return true;
597 }
598
599 void event_exit(void) {
600         running = false;
601 }