BSD: add kqueue support
[tinc] / src / event.c
index 226a452..c373eac 100644 (file)
@@ -1,6 +1,6 @@
 /*
     event.c -- I/O, timeout and signal event handling
-    Copyright (C) 2012-2021 Guus Sliepen <guus@tinc-vpn.org>
+    Copyright (C) 2012-2022 Guus Sliepen <guus@tinc-vpn.org>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
 */
 
 #include "system.h"
+
+#ifdef HAVE_WINDOWS
+#  include <assert.h>
+#else
+#  if defined(HAVE_SYS_EPOLL_H)
+#    include <sys/epoll.h>
+#    define HAVE_EPOLL 1
+#  elif defined(HAVE_SYS_EVENT_H)
+#    include <sys/event.h>
+#    define HAVE_KQUEUE 1
+#  else
+#    define HAVE_SELECT 1
+#  endif
+#endif
+
 #include "event.h"
 #include "utils.h"
+#include "net.h"
 
 struct timeval now;
+static bool running;
 
-#ifndef HAVE_MINGW
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
+static int event_fd = 0;
+#elif defined(HAVE_SELECT)
 static fd_set readfds;
 static fd_set writefds;
-#else
+#elif defined(HAVE_WINDOWS)
 static const long READ_EVENTS = FD_READ | FD_ACCEPT | FD_CLOSE;
 static const long WRITE_EVENTS = FD_WRITE | FD_CONNECT;
 static DWORD event_count = 0;
 #endif
-static bool running;
+
+static inline void event_init(void) {
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
+
+       if(!event_fd) {
+#if defined(HAVE_EPOLL)
+               /* NOTE: 1024 limit is only used on ancient (pre 2.6.27) kernels.
+                  Decent kernels will ignore this value making it unlimited.
+                  epoll_create1 might be better, but these kernels would not be supported
+                  in that case. */
+               event_fd = epoll_create(1024);
+#else
+               event_fd = kqueue();
+#endif
+
+               if(event_fd == -1) {
+                       logger(DEBUG_ALWAYS, LOG_EMERG, "Could not initialize events: %s", strerror(errno));
+                       abort();
+               }
+       }
+
+#endif
+}
+
+static void event_deinit(void) {
+#if defined(HAVE_EPOLL) || defined(HAVE_KQUEUE)
+
+       if(event_fd > 0) {
+               close(event_fd);
+               event_fd = 0;
+       }
+
+#endif
+}
 
 static int io_compare(const io_t *a, const io_t *b) {
-#ifndef HAVE_MINGW
+#ifndef HAVE_WINDOWS
        return a->fd - b->fd;
 #else
 
@@ -90,7 +142,7 @@ void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
        }
 
        io->fd = fd;
-#ifdef HAVE_MINGW
+#ifdef HAVE_WINDOWS
 
        if(io->fd != -1) {
                io->event = WSACreateEvent();
@@ -108,12 +160,16 @@ void io_add(io_t *io, io_cb_t cb, void *data, int fd, int flags) {
 
        io_set(io, flags);
 
+#ifdef HAVE_SELECT
+
        if(!splay_insert_node(&io_tree, &io->node)) {
                abort();
        }
+
+#endif
 }
 
-#ifdef HAVE_MINGW
+#ifdef HAVE_WINDOWS
 void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
        io->event = event;
        io_add(io, cb, data, -1, 0);
@@ -121,6 +177,8 @@ void io_add_event(io_t *io, io_cb_t cb, void *data, WSAEVENT event) {
 #endif
 
 void io_set(io_t *io, int flags) {
+       event_init();
+
        if(flags == io->flags) {
                return;
        }
@@ -131,7 +189,69 @@ void io_set(io_t *io, int flags) {
                return;
        }
 
-#ifndef HAVE_MINGW
+#ifndef HAVE_WINDOWS
+#ifdef HAVE_EPOLL
+       epoll_ctl(event_fd, EPOLL_CTL_DEL, io->fd, NULL);
+
+       struct epoll_event ev = {
+               .events = 0,
+               .data.ptr = io,
+       };
+
+       if(flags & IO_READ) {
+               ev.events |= EPOLLIN;
+       }
+
+       if(flags & IO_WRITE) {
+               ev.events |= EPOLLOUT;
+       } else if(ev.events == 0) {
+               io_tree.generation++;
+               return;
+       }
+
+       if(epoll_ctl(event_fd, EPOLL_CTL_ADD, io->fd, &ev) < 0) {
+               perror("epoll_ctl_add");
+       }
+
+#endif
+
+#ifdef HAVE_KQUEUE
+       const struct kevent change[] = {
+               {
+                       .ident = io->fd,
+                       .filter = EVFILT_READ,
+                       .flags = EV_RECEIPT | (flags & IO_READ ? EV_ADD : EV_DELETE),
+                       .udata = io,
+               },
+               {
+                       .ident = io->fd,
+                       .filter = EVFILT_WRITE,
+                       .flags = EV_RECEIPT | (flags & IO_WRITE ? EV_ADD : EV_DELETE),
+                       .udata = io,
+               },
+       };
+       struct kevent result[2];
+
+       if(kevent(event_fd, change, 2, result, 2, NULL) < 0) {
+               logger(DEBUG_ALWAYS, LOG_EMERG, "kevent failed: %s", strerror(errno));
+               abort();
+       }
+
+       int rerr = (int)result[0].data;
+       int werr = (int)result[1].data;
+
+       if((rerr && rerr != ENOENT) || (werr && werr != ENOENT)) {
+               logger(DEBUG_ALWAYS, LOG_EMERG, "kevent errors: %s, %s", strerror(rerr), strerror(werr));
+               abort();
+       }
+
+       if(!flags) {
+               io_tree.generation++;
+       }
+
+#endif
+
+#ifdef HAVE_SELECT
 
        if(flags & IO_READ) {
                FD_SET(io->fd, &readfds);
@@ -145,6 +265,8 @@ void io_set(io_t *io, int flags) {
                FD_CLR(io->fd, &writefds);
        }
 
+#endif
+
 #else
        long events = 0;
 
@@ -169,7 +291,7 @@ void io_del(io_t *io) {
        }
 
        io_set(io, 0);
-#ifdef HAVE_MINGW
+#ifdef HAVE_WINDOWS
 
        if(io->fd != -1 && WSACloseEvent(io->event) == FALSE) {
                abort();
@@ -178,11 +300,13 @@ void io_del(io_t *io) {
        event_count--;
 #endif
 
+#if HAVE_SELECT
        splay_unlink_node(&io_tree, &io->node);
+#endif
        io->cb = NULL;
 }
 
-void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval *tv) {
+void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, const struct timeval *tv) {
        timeout->cb = cb;
        timeout->data = data;
        timeout->node.data = timeout;
@@ -190,7 +314,7 @@ void timeout_add(timeout_t *timeout, timeout_cb_t cb, void *data, struct timeval
        timeout_set(timeout, tv);
 }
 
-void timeout_set(timeout_t *timeout, struct timeval *tv) {
+void timeout_set(timeout_t *timeout, const struct timeval *tv) {
        if(timerisset(&timeout->tv)) {
                splay_unlink_node(&timeout_tree, &timeout->node);
        }
@@ -218,7 +342,7 @@ void timeout_del(timeout_t *timeout) {
        };
 }
 
-#ifndef HAVE_MINGW
+#ifndef HAVE_WINDOWS
 
 // From Matz's Ruby
 #ifndef NSIG
@@ -228,11 +352,14 @@ void timeout_del(timeout_t *timeout) {
 
 static io_t signalio;
 static int pipefd[2] = {-1, -1};
-static signal_t *signal_handle[NSIG + 1] = {};
+static signal_t *signal_handle[NSIG + 1] = {NULL};
 
 static void signal_handler(int signum) {
        unsigned char num = signum;
-       write(pipefd[1], &num, 1);
+
+       if(write(pipefd[1], &num, 1) != 1) {
+               // Pipe full or broken, nothing we can do about it.
+       }
 }
 
 static void signalio_handler(void *data, int flags) {
@@ -287,7 +414,7 @@ void signal_del(signal_t *sig) {
 }
 #endif
 
-static struct timeval *get_time_remaining(struct timeval *diff) {
+static struct timeval *timeout_execute(struct timeval *diff) {
        gettimeofday(&now, NULL);
        struct timeval *tv = NULL;
 
@@ -311,26 +438,57 @@ static struct timeval *get_time_remaining(struct timeval *diff) {
 }
 
 bool event_loop(void) {
+       event_init();
        running = true;
 
-#ifndef HAVE_MINGW
+#ifndef HAVE_WINDOWS
+
+#ifdef HAVE_SELECT
        fd_set readable;
        fd_set writable;
+#endif
 
        while(running) {
                struct timeval diff;
-               struct timeval *tv = get_time_remaining(&diff);
+               struct timeval *tv = timeout_execute(&diff);
+
+#ifdef HAVE_SELECT
                memcpy(&readable, &readfds, sizeof(readable));
                memcpy(&writable, &writefds, sizeof(writable));
+#endif
+
+#ifdef HAVE_EPOLL
+               struct epoll_event events[MAX_EVENTS_PER_LOOP];
+               long timeout = (tv->tv_sec * 1000) + (tv->tv_usec / 1000);
+
+               if(timeout > INT_MAX) {
+                       timeout = INT_MAX;
+               }
+
+               int n = epoll_wait(event_fd, events, MAX_EVENTS_PER_LOOP, (int)timeout);
+#endif
+
+#ifdef HAVE_KQUEUE
+               struct kevent events[MAX_EVENTS_PER_LOOP];
 
-               int fds = 0;
+               const struct timespec ts = {
+                       .tv_sec = tv->tv_sec,
+                       .tv_nsec = tv->tv_usec * 1000,
+               };
+
+               int n = kevent(event_fd, NULL, 0, events, MAX_EVENTS_PER_LOOP, &ts);
+#endif
+
+#ifdef HAVE_SELECT
+               int maxfds =  0;
 
                if(io_tree.tail) {
                        io_t *last = io_tree.tail->data;
-                       fds = last->fd + 1;
+                       maxfds = last->fd + 1;
                }
 
-               int n = select(fds, &readable, &writable, NULL, tv);
+               int n = select(maxfds, &readable, &writable, NULL, tv);
+#endif
 
                if(n < 0) {
                        if(sockwouldblock(sockerrno)) {
@@ -346,6 +504,54 @@ bool event_loop(void) {
 
                unsigned int curgen = io_tree.generation;
 
+
+#ifdef HAVE_EPOLL
+
+               for(int i = 0; i < n; i++) {
+                       io_t *io = events[i].data.ptr;
+
+                       if(events[i].events & EPOLLOUT && io->flags & IO_WRITE) {
+                               io->cb(io->data, IO_WRITE);
+                       }
+
+                       if(curgen != io_tree.generation) {
+                               break;
+                       }
+
+                       if(events[i].events & EPOLLIN && io->flags & IO_READ) {
+                               io->cb(io->data, IO_READ);
+                       }
+
+                       if(curgen != io_tree.generation) {
+                               break;
+                       }
+               }
+
+#endif
+
+#ifdef HAVE_KQUEUE
+
+               for(int i = 0; i < n; i++) {
+                       const struct kevent *evt = &events[i];
+                       const io_t *io = evt->udata;
+
+                       if(evt->filter == EVFILT_WRITE) {
+                               io->cb(io->data, IO_WRITE);
+                       } else if(evt->filter == EVFILT_READ) {
+                               io->cb(io->data, IO_READ);
+                       } else {
+                               continue;
+                       }
+
+                       if(curgen != io_tree.generation) {
+                               break;
+                       }
+               }
+
+#endif
+
+#ifdef HAVE_SELECT
+
                for splay_each(io_t, io, &io_tree) {
                        if(FD_ISSET(io->fd, &writable)) {
                                io->cb(io->data, IO_WRITE);
@@ -356,22 +562,25 @@ bool event_loop(void) {
                        }
 
                        /*
-                          There are scenarios in which the callback will remove another io_t from the tree
-                          (e.g. closing a double connection). Since splay_each does not support that, we
-                          need to exit the loop if that happens. That's okay, since any remaining events will
-                          get picked up by the next select() call.
-                        */
+                               There are scenarios in which the callback will remove another io_t from the tree
+                               (e.g. closing a double connection). Since splay_each does not support that, we
+                               need to exit the loop if that happens. That's okay, since any remaining events will
+                               get picked up by the next select() call.
+                       */
                        if(curgen != io_tree.generation) {
                                break;
                        }
                }
+
+#endif
        }
 
 #else
+       assert(WSA_WAIT_EVENT_0 == 0);
 
        while(running) {
                struct timeval diff;
-               struct timeval *tv = get_time_remaining(&diff);
+               struct timeval *tv = timeout_execute(&diff);
                DWORD timeout_ms = tv ? (DWORD)(tv->tv_sec * 1000 + tv->tv_usec / 1000 + 1) : WSA_INFINITE;
 
                if(!event_count) {
@@ -429,12 +638,12 @@ bool event_loop(void) {
                                break;
                        }
 
-                       if(result < WSA_WAIT_EVENT_0 || result >= WSA_WAIT_EVENT_0 + event_count - event_offset) {
+                       if(result >= event_count - event_offset) {
                                return false;
                        }
 
                        /* Look up io in the map by index. */
-                       event_index = result - WSA_WAIT_EVENT_0 + event_offset;
+                       event_index = result + event_offset;
                        io_t *io = io_map[event_index];
 
                        if(io->fd == -1) {
@@ -475,6 +684,7 @@ bool event_loop(void) {
 
 #endif
 
+       event_deinit();
        return true;
 }