Try to process all pending events after select().
[tinc] / src / event.c
1 /*
2     event.c -- I/O, timeout and signal event handling
3     Copyright (C) 2012-2013 Guus Sliepen <guus@tinc-vpn.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License along
16     with this program; if not, write to the Free Software Foundation, Inc.,
17     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18 */
19
20 #include "system.h"
21
22 #include "dropin.h"
23 #include "event.h"
24 #include "net.h"
25 #include "utils.h"
26 #include "xalloc.h"
27
28 struct timeval now;
29
30 #ifndef HAVE_MINGW
31 static fd_set readfds;
32 static fd_set writefds;
33 #else
34 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
35 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
36 static DWORD event_count = 0;
37 #endif
38 static bool running;
39
40 static int io_compare(const io_t *a, const io_t *b) {
41 #ifndef HAVE_MINGW
42         return a->fd - b->fd;
43 #else
44
45         if(a->event < b->event) {
46                 return -1;
47         }
48
49         if(a->event > b->event) {
50                 return 1;
51         }
52
53         return 0;
54 #endif
55 }
56
57 static int timeout_compare(const timeout_t *a, const timeout_t *b) {
58         struct timeval diff;
59         timersub(&a->tv, &b->tv, &diff);
60
61         if(diff.tv_sec < 0) {
62                 return -1;
63         }
64
65         if(diff.tv_sec > 0) {
66                 return 1;
67         }
68
69         if(diff.tv_usec < 0) {
70                 return -1;
71         }
72
73         if(diff.tv_usec > 0) {
74                 return 1;
75         }
76
77         if(a < b) {
78                 return -1;
79         }
80
81         if(a > b) {
82                 return 1;
83         }
84
85         return 0;
86 }
87
88 static splay_tree_t io_tree = {.compare = (splay_compare_t)io_compare};
89 static splay_tree_t timeout_tree = {.compare = (splay_compare_t)timeout_compare};
90
91 void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
92         if(io->cb) {
93                 return;
94         }
95
96         io->fd = fd;
97 #ifdef HAVE_MINGW
98
99         if(io->fd != -1) {
100                 io->event = WSACreateEvent();
101
102                 if(io->event == WSA_INVALID_EVENT) {
103                         abort();
104                 }
105         }
106
107         event_count++;
108 #endif
109         io->cb = cb;
110         io->data = data;
111         io->node.data = io;
112
113         io_set(io, flags);
114
115         if(!splay_insert_node(&io_tree, &io->node)) {
116                 abort();
117         }
118 }
119
120 #ifdef HAVE_MINGW
121 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
122         io->event = event;
123         io_add(io, cb, data, -1, 0);
124 }
125 #endif
126
127 void io_set(io_t *io, int flags) {
128         if(flags == io->flags) {
129                 return;
130         }
131
132         io->flags = flags;
133
134         if(io->fd == -1) {
135                 return;
136         }
137
138 #ifndef HAVE_MINGW
139
140         if(flags & IO_READ) {
141                 FD_SET(io->fd, &readfds);
142         } else {
143                 FD_CLR(io->fd, &readfds);
144         }
145
146         if(flags & IO_WRITE) {
147                 FD_SET(io->fd, &writefds);
148         } else {
149                 FD_CLR(io->fd, &writefds);
150         }
151
152 #else
153         long events = 0;
154
155         if(flags & IO_WRITE) {
156                 events |= WRITE_EVENTS;
157         }
158
159         if(flags & IO_READ) {
160                 events |= READ_EVENTS;
161         }
162
163         if(WSAEventSelect(io->fd, io->event, events) != 0) {
164                 abort();
165         }
166
167 #endif
168 }
169
170 void io_del(io_t *io) {
171         if(!io->cb) {
172                 return;
173         }
174
175         io_set(io, 0);
176 #ifdef HAVE_MINGW
177
178         if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
179                 abort();
180         }
181
182         event_count--;
183 #endif
184
185         splay_unlink_node(&io_tree, &io->node);
186         io->cb = NULL;
187 }
188
189 void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
190         timeout->cb = cb;
191         timeout->data = data;
192         timeout->node.data = timeout;
193
194         timeout_set(timeout, tv);
195 }
196
197 void timeout_set(timeout_t *timeout, struct timeval *tv) {
198         if(timerisset(&timeout->tv)) {
199                 splay_unlink_node(&timeout_tree, &timeout->node);
200         }
201
202         if(!now.tv_sec) {
203                 gettimeofday(&now, NULL);
204         }
205
206         timeradd(&now, tv, &timeout->tv);
207
208         if(!splay_insert_node(&timeout_tree, &timeout->node)) {
209                 abort();
210         }
211 }
212
213 void timeout_del(timeout_t *timeout) {
214         if(!timeout->cb) {
215                 return;
216         }
217
218         splay_unlink_node(&timeout_tree, &timeout->node);
219         timeout->cb = 0;
220         timeout->tv = (struct timeval) {
221                 0, 0
222         };
223 }
224
225 #ifndef HAVE_MINGW
226 static int signal_compare(const signal_t *a, const signal_t *b) {
227         return a->signum - b->signum;
228 }
229
230 static io_t signalio;
231 static int pipefd[2] = {-1, -1};
232 static splay_tree_t signal_tree = {.compare = (splay_compare_t)signal_compare};
233
234 static void signal_handler(int signum) {
235         unsigned char num = signum;
236         write(pipefd[1], &num, 1);
237 }
238
239 static void signalio_handler(void *data, int flags) {
240         unsigned char signum;
241
242         if(read(pipefd[0], &signum, 1) != 1) {
243                 return;
244         }
245
246         signal_t *sig = splay_search(&signal_tree, &((signal_t) {
247                 .signum = signum
248         }));
249
250         if(sig) {
251                 sig->cb(sig->data);
252         }
253 }
254
255 static void pipe_init(void) {
256         if(!pipe(pipefd)) {
257                 io_add(&signalio, signalio_handler, NULL, pipefd[0], IO_READ);
258         }
259 }
260
261 void signal_add(signal_t *sig, signal_cb_t cb, void *data, int signum) {
262         if(sig->cb) {
263                 return;
264         }
265
266         sig->cb = cb;
267         sig->data = data;
268         sig->signum = signum;
269         sig->node.data = sig;
270
271         if(pipefd[0] == -1) {
272                 pipe_init();
273         }
274
275         signal(sig->signum, signal_handler);
276
277         if(!splay_insert_node(&signal_tree, &sig->node)) {
278                 abort();
279         }
280 }
281
282 void signal_del(signal_t *sig) {
283         if(!sig->cb) {
284                 return;
285         }
286
287         signal(sig->signum, SIG_DFL);
288
289         splay_unlink_node(&signal_tree, &sig->node);
290         sig->cb = NULL;
291 }
292 #endif
293
294 static struct timeval *get_time_remaining(struct timeval *diff) {
295         gettimeofday(&now, NULL);
296         struct timeval *tv = NULL;
297
298         while(timeout_tree.head) {
299                 timeout_t *timeout = timeout_tree.head->data;
300                 timersub(&timeout->tv, &now, diff);
301
302                 if(diff->tv_sec < 0) {
303                         timeout->cb(timeout->data);
304
305                         if(timercmp(&timeout->tv, &now, <)) {
306                                 timeout_del(timeout);
307                         }
308                 } else {
309                         tv = diff;
310                         break;
311                 }
312         }
313
314         return tv;
315 }
316
317 bool event_loop(void) {
318         running = true;
319
320 #ifndef HAVE_MINGW
321         fd_set readable;
322         fd_set writable;
323
324         while(running) {
325                 struct timeval diff;
326                 struct timeval *tv = get_time_remaining(&diff);
327                 memcpy(&readable, &readfds, sizeof(readable));
328                 memcpy(&writable, &writefds, sizeof(writable));
329
330                 int fds = 0;
331
332                 if(io_tree.tail) {
333                         io_t *last = io_tree.tail->data;
334                         fds = last->fd + 1;
335                 }
336
337                 int n = select(fds, &readable, &writable, NULL, tv);
338
339                 if(n < 0) {
340                         if(sockwouldblock(sockerrno)) {
341                                 continue;
342                         } else {
343                                 return false;
344                         }
345                 }
346
347                 if(!n) {
348                         continue;
349                 }
350
351                 unsigned int curgen = io_tree.generation;
352
353                 for splay_each(io_t, io, &io_tree) {
354                         if(FD_ISSET(io->fd, &writable)) {
355                                 io->cb(io->data, IO_WRITE);
356                         } else if(FD_ISSET(io->fd, &readable)) {
357                                 io->cb(io->data, IO_READ);
358                         } else {
359                                 continue;
360                         }
361
362                         /*
363                            There are scenarios in which the callback will remove another io_t from the tree
364                            (e.g. closing a double connection). Since splay_each does not support that, we
365                            need to exit the loop if that happens. That's okay, since any remaining events will
366                            get picked up by the next select() call.
367                          */
368                         if(curgen != io_tree.generation) {
369                                 break;
370                         }
371                 }
372         }
373
374 #else
375
376         while(running) {
377                 struct timeval diff;
378                 struct timeval *tv = get_time_remaining(&diff);
379                 DWORD timeout_ms = tv ? (tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
380
381                 if(!event_count) {
382                         Sleep(timeout_ms);
383                         continue;
384                 }
385
386                 /*
387                    For some reason, Microsoft decided to make the FD_WRITE event edge-triggered instead of level-triggered,
388                    which is the opposite of what select() does. In practice, that means that if a FD_WRITE event triggers,
389                    it will never trigger again until a send() returns EWOULDBLOCK. Since the semantics of this event loop
390                    is that write events are level-triggered (i.e. they continue firing until the socket is full), we need
391                    to emulate these semantics by making sure we fire each IO_WRITE that is still writeable.
392
393                    Note that technically FD_CLOSE has the same problem, but it's okay because user code does not rely on
394                    this event being fired again if ignored.
395                 */
396                 unsigned int curgen = io_tree.generation;
397
398                 for splay_each(io_t, io, &io_tree) {
399                         if(io->flags & IO_WRITE && send(io->fd, NULL, 0, 0) == 0) {
400                                 io->cb(io->data, IO_WRITE);
401
402                                 if(curgen != io_tree.generation) {
403                                         break;
404                                 }
405                         }
406                 }
407
408                 if(event_count > WSA_MAXIMUM_WAIT_EVENTS) {
409                         WSASetLastError(WSA_INVALID_PARAMETER);
410                         return(false);
411                 }
412
413                 WSAEVENT events[WSA_MAXIMUM_WAIT_EVENTS];
414                 io_t *io_map[WSA_MAXIMUM_WAIT_EVENTS];
415                 DWORD event_index = 0;
416
417                 for splay_each(io_t, io, &io_tree) {
418                         events[event_index] = io->event;
419                         io_map[event_index] = io;
420                         event_index++;
421                 }
422
423                 /*
424                  * If the generation number changes due to event addition
425                  * or removal by a callback we restart the loop.
426                  */
427                 curgen = io_tree.generation;
428
429                 for(DWORD event_offset = 0; event_offset < event_count;) {
430                         DWORD result = WSAWaitForMultipleEvents(event_count - event_offset, &events[event_offset], FALSE, timeout_ms, FALSE);
431
432                         if(result == WSA_WAIT_TIMEOUT) {
433                                 break;
434                         }
435
436                         if(result < WSA_WAIT_EVENT_0 || result >= WSA_WAIT_EVENT_0 + event_count - event_offset) {
437                                 return(false);
438                         }
439
440                         /* Look up io in the map by index. */
441                         event_index = result - WSA_WAIT_EVENT_0 + event_offset;
442                         io_t *io = io_map[event_index];
443
444                         if(io->fd == -1) {
445                                 io->cb(io->data, 0);
446
447                                 if(curgen != io_tree.generation) {
448                                         break;
449                                 }
450                         } else {
451                                 WSANETWORKEVENTS network_events;
452
453                                 if(WSAEnumNetworkEvents(io->fd, io->event, &network_events) != 0) {
454                                         return(false);
455                                 }
456
457                                 if(network_events.lNetworkEvents & READ_EVENTS) {
458                                         io->cb(io->data, IO_READ);
459
460                                         if(curgen != io_tree.generation) {
461                                                 break;
462                                         }
463                                 }
464
465                                 /*
466                                     The fd might be available for write too. However, if we already fired the read callback, that
467                                     callback might have deleted the io (e.g. through terminate_connection()), so we can't fire the
468                                     write callback here. Instead, we loop back and let the writable io loop above handle it.
469                                  */
470                         }
471
472                         /* Continue checking the rest of the events. */
473                         event_offset = event_index + 1;
474
475                         /* Just poll the next time through. */
476                         timeout_ms = 0;
477                 }
478         }
479
480 #endif
481
482         return true;
483 }
484
485 void event_exit(void) {
486         running = false;
487 }