Skip to content

Commit

Permalink
Revert "queueing: get rid of per-peer ring buffers"
Browse files Browse the repository at this point in the history
This reverts commit bd2c339.

Signed-off-by: Jason A. Donenfeld <[email protected]>
  • Loading branch information
zx2c4 committed Feb 18, 2021
1 parent bd2c339 commit 57a2f44
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 160 deletions.
16 changes: 0 additions & 16 deletions src/compat/compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -1068,22 +1068,6 @@ static const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tun
#define kfree_sensitive(a) kzfree(a)
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(ISRHEL7)
#define xchg_release xchg
#endif

#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7)
#include <asm/barrier.h>
#ifndef smp_load_acquire
#define smp_load_acquire(p) \
({ \
typeof(*p) ___p1 = ACCESS_ONCE(*p); \
smp_mb(); \
___p1; \
})
#endif
#endif

#if defined(ISUBUNTU1604) || defined(ISRHEL7)
#include <linux/siphash.h>
#ifndef _WG_LINUX_SIPHASH_H
Expand Down
12 changes: 6 additions & 6 deletions src/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ static void wg_destruct(struct net_device *dev)
destroy_workqueue(wg->handshake_receive_wq);
destroy_workqueue(wg->handshake_send_wq);
destroy_workqueue(wg->packet_crypt_wq);
wg_packet_queue_free(&wg->decrypt_queue);
wg_packet_queue_free(&wg->encrypt_queue);
wg_packet_queue_free(&wg->decrypt_queue, true);
wg_packet_queue_free(&wg->encrypt_queue, true);
rcu_barrier(); /* Wait for all the peers to be actually freed. */
wg_ratelimiter_uninit();
memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
Expand Down Expand Up @@ -351,12 +351,12 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
goto err_destroy_handshake_send;

ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
MAX_QUEUED_PACKETS);
true, MAX_QUEUED_PACKETS);
if (ret < 0)
goto err_destroy_packet_crypt;

ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
MAX_QUEUED_PACKETS);
true, MAX_QUEUED_PACKETS);
if (ret < 0)
goto err_free_encrypt_queue;

Expand All @@ -381,9 +381,9 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
err_uninit_ratelimiter:
wg_ratelimiter_uninit();
err_free_decrypt_queue:
wg_packet_queue_free(&wg->decrypt_queue);
wg_packet_queue_free(&wg->decrypt_queue, true);
err_free_encrypt_queue:
wg_packet_queue_free(&wg->encrypt_queue);
wg_packet_queue_free(&wg->encrypt_queue, true);
err_destroy_packet_crypt:
destroy_workqueue(wg->packet_crypt_wq);
err_destroy_handshake_send:
Expand Down
15 changes: 7 additions & 8 deletions src/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,13 @@ struct multicore_worker {

struct crypt_queue {
struct ptr_ring ring;
struct multicore_worker __percpu *worker;
int last_cpu;
};

struct prev_queue {
struct sk_buff *head, *tail, *peeked;
struct { struct sk_buff *next, *prev; } empty; // Match first 2 members of struct sk_buff.
atomic_t count;
union {
struct {
struct multicore_worker __percpu *worker;
int last_cpu;
};
struct work_struct work;
};
};

struct wg_device {
Expand Down
28 changes: 19 additions & 9 deletions src/peer.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,22 +32,27 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
peer = kzalloc(sizeof(*peer), GFP_KERNEL);
if (unlikely(!peer))
return ERR_PTR(ret);
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err;

peer->device = wg;

wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
public_key, preshared_key, peer);
if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
goto err_1;
if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
MAX_QUEUED_PACKETS))
goto err_2;
if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
MAX_QUEUED_PACKETS))
goto err_3;

peer->internal_id = atomic64_inc_return(&peer_counter);
peer->serial_work_cpu = nr_cpumask_bits;
wg_cookie_init(&peer->latest_cookie);
wg_timers_init(peer);
wg_cookie_checker_precompute_peer_keys(peer);
spin_lock_init(&peer->keypairs.keypair_update_lock);
INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker);
INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker);
wg_prev_queue_init(&peer->tx_queue);
wg_prev_queue_init(&peer->rx_queue);
INIT_WORK(&peer->transmit_handshake_work,
wg_packet_handshake_send_worker);
rwlock_init(&peer->endpoint_lock);
kref_init(&peer->refcount);
skb_queue_head_init(&peer->staged_packet_queue);
Expand All @@ -63,7 +68,11 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
return peer;

err:
err_3:
wg_packet_queue_free(&peer->tx_queue, false);
err_2:
dst_cache_destroy(&peer->endpoint_cache);
err_1:
kfree(peer);
return ERR_PTR(ret);
}
Expand Down Expand Up @@ -188,7 +197,8 @@ static void rcu_release(struct rcu_head *rcu)
struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);

dst_cache_destroy(&peer->endpoint_cache);
WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue));
wg_packet_queue_free(&peer->rx_queue, false);
wg_packet_queue_free(&peer->tx_queue, false);

/* The final zeroing takes care of clearing any remaining handshake key
* material and other potentially sensitive information.
Expand Down
4 changes: 2 additions & 2 deletions src/peer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct endpoint {

struct wg_peer {
struct wg_device *device;
struct prev_queue tx_queue, rx_queue;
struct crypt_queue tx_queue, rx_queue;
struct sk_buff_head staged_packet_queue;
int serial_work_cpu;
bool is_dead;
Expand All @@ -46,7 +46,7 @@ struct wg_peer {
rwlock_t endpoint_lock;
struct noise_handshake handshake;
atomic64_t last_sent_handshake;
struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work;
struct work_struct transmit_handshake_work, clear_peer_work;
struct cookie latest_cookie;
struct hlist_node pubkey_hash;
u64 rx_bytes, tx_bytes;
Expand Down
86 changes: 17 additions & 69 deletions src/queueing.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ struct multicore_worker __percpu *
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
{
int cpu;
struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker);
struct multicore_worker __percpu *worker =
alloc_percpu(struct multicore_worker);

if (!worker)
return NULL;
Expand All @@ -22,86 +23,33 @@ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
}

int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
unsigned int len)
bool multicore, unsigned int len)
{
int ret;

memset(queue, 0, sizeof(*queue));
ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
if (ret)
return ret;
queue->worker = wg_packet_percpu_multicore_worker_alloc(function, queue);
if (!queue->worker) {
ptr_ring_cleanup(&queue->ring, NULL);
return -ENOMEM;
if (function) {
if (multicore) {
queue->worker = wg_packet_percpu_multicore_worker_alloc(
function, queue);
if (!queue->worker) {
ptr_ring_cleanup(&queue->ring, NULL);
return -ENOMEM;
}
} else {
INIT_WORK(&queue->work, function);
}
}
return 0;
}

void wg_packet_queue_free(struct crypt_queue *queue)
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
{
free_percpu(queue->worker);
if (multicore)
free_percpu(queue->worker);
WARN_ON(!__ptr_ring_empty(&queue->ring));
ptr_ring_cleanup(&queue->ring, NULL);
}

#define NEXT(skb) ((skb)->prev)
#define STUB(queue) ((struct sk_buff *)&queue->empty)

void wg_prev_queue_init(struct prev_queue *queue)
{
NEXT(STUB(queue)) = NULL;
queue->head = queue->tail = STUB(queue);
queue->peeked = NULL;
atomic_set(&queue->count, 0);
BUILD_BUG_ON(
offsetof(struct sk_buff, next) != offsetof(struct prev_queue, empty.next) -
offsetof(struct prev_queue, empty) ||
offsetof(struct sk_buff, prev) != offsetof(struct prev_queue, empty.prev) -
offsetof(struct prev_queue, empty));
}

static void __wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb)
{
WRITE_ONCE(NEXT(skb), NULL);
WRITE_ONCE(NEXT(xchg_release(&queue->head, skb)), skb);
}

bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb)
{
if (!atomic_add_unless(&queue->count, 1, MAX_QUEUED_PACKETS))
return false;
__wg_prev_queue_enqueue(queue, skb);
return true;
}

struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue)
{
struct sk_buff *tail = queue->tail, *next = smp_load_acquire(&NEXT(tail));

if (tail == STUB(queue)) {
if (!next)
return NULL;
queue->tail = next;
tail = next;
next = smp_load_acquire(&NEXT(next));
}
if (next) {
queue->tail = next;
atomic_dec(&queue->count);
return tail;
}
if (tail != READ_ONCE(queue->head))
return NULL;
__wg_prev_queue_enqueue(queue, STUB(queue));
next = smp_load_acquire(&NEXT(tail));
if (next) {
queue->tail = next;
atomic_dec(&queue->count);
return tail;
}
return NULL;
}

#undef NEXT
#undef STUB
45 changes: 12 additions & 33 deletions src/queueing.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,12 @@ struct wg_device;
struct wg_peer;
struct multicore_worker;
struct crypt_queue;
struct prev_queue;
struct sk_buff;

/* queueing.c APIs: */
int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
unsigned int len);
void wg_packet_queue_free(struct crypt_queue *queue);
bool multicore, unsigned int len);
void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
struct multicore_worker __percpu *
wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);

Expand Down Expand Up @@ -139,31 +138,8 @@ static inline int wg_cpumask_next_online(int *next)
return cpu;
}

void wg_prev_queue_init(struct prev_queue *queue);

/* Multi producer */
bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb);

/* Single consumer */
struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue);

/* Single consumer */
static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue)
{
if (queue->peeked)
return queue->peeked;
queue->peeked = wg_prev_queue_dequeue(queue);
return queue->peeked;
}

/* Single consumer */
static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue)
{
queue->peeked = NULL;
}

static inline int wg_queue_enqueue_per_device_and_peer(
struct crypt_queue *device_queue, struct prev_queue *peer_queue,
struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
{
int cpu;
Expand All @@ -172,9 +148,8 @@ static inline int wg_queue_enqueue_per_device_and_peer(
/* We first queue this up for the peer ingestion, but the consumer
* will wait for the state to change to CRYPTED or DEAD before.
*/
if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb)))
if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
return -ENOSPC;

/* Then we queue it up in the device queue, which consumes the
* packet as soon as it can.
*/
Expand All @@ -185,20 +160,24 @@ static inline int wg_queue_enqueue_per_device_and_peer(
return 0;
}

static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state)
static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
struct sk_buff *skb,
enum packet_state state)
{
/* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us.
*/
struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));

atomic_set_release(&PACKET_CB(skb)->state, state);
queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id),
peer->device->packet_crypt_wq, &peer->transmit_packet_work);
queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
peer->internal_id),
peer->device->packet_crypt_wq, &queue->work);
wg_peer_put(peer);
}

static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state)
static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
enum packet_state state)
{
/* We take a reference, because as soon as we call atomic_set, the
* peer can be freed from below us.
Expand Down
Loading

0 comments on commit 57a2f44

Please sign in to comment.