forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 1
/
fragmentation.c
553 lines (478 loc) · 15.8 KB
/
fragmentation.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) B.A.T.M.A.N. contributors:
*
* Martin Hundebøll <[email protected]>
*/
#include "fragmentation.h"
#include "main.h"
#include <linux/atomic.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
#include <linux/gfp.h>
#include <linux/if_ether.h>
#include <linux/jiffies.h>
#include <linux/lockdep.h>
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <uapi/linux/batadv_packet.h>
#include "hard-interface.h"
#include "originator.h"
#include "routing.h"
#include "send.h"
/**
* batadv_frag_clear_chain() - delete entries in the fragment buffer chain
* @head: head of chain with entries.
* @dropped: whether the chain is cleared because all fragments are dropped
*
* Free fragments in the passed hlist. Should be called with appropriate lock.
*/
static void batadv_frag_clear_chain(struct hlist_head *head, bool dropped)
{
struct batadv_frag_list_entry *entry;
struct hlist_node *node;
hlist_for_each_entry_safe(entry, node, head, list) {
hlist_del(&entry->list);
if (dropped)
kfree_skb(entry->skb);
else
consume_skb(entry->skb);
kfree(entry);
}
}
/**
* batadv_frag_purge_orig() - free fragments associated to an orig
* @orig_node: originator to free fragments from
* @check_cb: optional function to tell if an entry should be purged
*/
void batadv_frag_purge_orig(struct batadv_orig_node *orig_node,
bool (*check_cb)(struct batadv_frag_table_entry *))
{
struct batadv_frag_table_entry *chain;
u8 i;
for (i = 0; i < BATADV_FRAG_BUFFER_COUNT; i++) {
chain = &orig_node->fragments[i];
spin_lock_bh(&chain->lock);
if (!check_cb || check_cb(chain)) {
batadv_frag_clear_chain(&chain->fragment_list, true);
chain->size = 0;
}
spin_unlock_bh(&chain->lock);
}
}
/**
* batadv_frag_size_limit() - maximum possible size of packet to be fragmented
*
* Return: the maximum size of payload that can be fragmented.
*/
static int batadv_frag_size_limit(void)
{
int limit = BATADV_FRAG_MAX_FRAG_SIZE;
limit -= sizeof(struct batadv_frag_packet);
limit *= BATADV_FRAG_MAX_FRAGMENTS;
return limit;
}
/**
* batadv_frag_init_chain() - check and prepare fragment chain for new fragment
* @chain: chain in fragments table to init
* @seqno: sequence number of the received fragment
*
* Make chain ready for a fragment with sequence number "seqno". Delete existing
* entries if they have an "old" sequence number.
*
* Caller must hold chain->lock.
*
* Return: true if chain is empty and the caller can just insert the new
* fragment without searching for the right position.
*/
static bool batadv_frag_init_chain(struct batadv_frag_table_entry *chain,
u16 seqno)
{
lockdep_assert_held(&chain->lock);
if (chain->seqno == seqno)
return false;
if (!hlist_empty(&chain->fragment_list))
batadv_frag_clear_chain(&chain->fragment_list, true);
chain->size = 0;
chain->seqno = seqno;
return true;
}
/**
* batadv_frag_insert_packet() - insert a fragment into a fragment chain
* @orig_node: originator that the fragment was received from
* @skb: skb to insert
* @chain_out: list head to attach complete chains of fragments to
*
* Insert a new fragment into the reverse ordered chain in the right table
* entry. The hash table entry is cleared if "old" fragments exist in it.
*
* Return: true if skb is buffered, false on error. If the chain has all the
* fragments needed to merge the packet, the chain is moved to the passed head
* to avoid locking the chain in the table.
*/
static bool batadv_frag_insert_packet(struct batadv_orig_node *orig_node,
struct sk_buff *skb,
struct hlist_head *chain_out)
{
struct batadv_frag_table_entry *chain;
struct batadv_frag_list_entry *frag_entry_new = NULL, *frag_entry_curr;
struct batadv_frag_list_entry *frag_entry_last = NULL;
struct batadv_frag_packet *frag_packet;
u8 bucket;
u16 seqno, hdr_size = sizeof(struct batadv_frag_packet);
bool ret = false;
/* Linearize packet to avoid linearizing 16 packets in a row when doing
* the later merge. Non-linear merge should be added to remove this
* linearization.
*/
if (skb_linearize(skb) < 0)
goto err;
frag_packet = (struct batadv_frag_packet *)skb->data;
seqno = ntohs(frag_packet->seqno);
bucket = seqno % BATADV_FRAG_BUFFER_COUNT;
frag_entry_new = kmalloc(sizeof(*frag_entry_new), GFP_ATOMIC);
if (!frag_entry_new)
goto err;
frag_entry_new->skb = skb;
frag_entry_new->no = frag_packet->no;
/* Select entry in the "chain table" and delete any prior fragments
* with another sequence number. batadv_frag_init_chain() returns true,
* if the list is empty at return.
*/
chain = &orig_node->fragments[bucket];
spin_lock_bh(&chain->lock);
if (batadv_frag_init_chain(chain, seqno)) {
hlist_add_head(&frag_entry_new->list, &chain->fragment_list);
chain->size = skb->len - hdr_size;
chain->timestamp = jiffies;
chain->total_size = ntohs(frag_packet->total_size);
ret = true;
goto out;
}
/* Find the position for the new fragment. */
hlist_for_each_entry(frag_entry_curr, &chain->fragment_list, list) {
/* Drop packet if fragment already exists. */
if (frag_entry_curr->no == frag_entry_new->no)
goto err_unlock;
/* Order fragments from highest to lowest. */
if (frag_entry_curr->no < frag_entry_new->no) {
hlist_add_before(&frag_entry_new->list,
&frag_entry_curr->list);
chain->size += skb->len - hdr_size;
chain->timestamp = jiffies;
ret = true;
goto out;
}
/* store current entry because it could be the last in list */
frag_entry_last = frag_entry_curr;
}
/* Reached the end of the list, so insert after 'frag_entry_last'. */
if (likely(frag_entry_last)) {
hlist_add_behind(&frag_entry_new->list, &frag_entry_last->list);
chain->size += skb->len - hdr_size;
chain->timestamp = jiffies;
ret = true;
}
out:
if (chain->size > batadv_frag_size_limit() ||
chain->total_size != ntohs(frag_packet->total_size) ||
chain->total_size > batadv_frag_size_limit()) {
/* Clear chain if total size of either the list or the packet
* exceeds the maximum size of one merged packet. Don't allow
* packets to have different total_size.
*/
batadv_frag_clear_chain(&chain->fragment_list, true);
chain->size = 0;
} else if (ntohs(frag_packet->total_size) == chain->size) {
/* All fragments received. Hand over chain to caller. */
hlist_move_list(&chain->fragment_list, chain_out);
chain->size = 0;
}
err_unlock:
spin_unlock_bh(&chain->lock);
err:
if (!ret) {
kfree(frag_entry_new);
kfree_skb(skb);
}
return ret;
}
/**
* batadv_frag_merge_packets() - merge a chain of fragments
* @chain: head of chain with fragments
*
* Expand the first skb in the chain and copy the content of the remaining
* skb's into the expanded one. After doing so, clear the chain.
*
* Return: the merged skb or NULL on error.
*/
static struct sk_buff *
batadv_frag_merge_packets(struct hlist_head *chain)
{
struct batadv_frag_packet *packet;
struct batadv_frag_list_entry *entry;
struct sk_buff *skb_out;
int size, hdr_size = sizeof(struct batadv_frag_packet);
bool dropped = false;
/* Remove first entry, as this is the destination for the rest of the
* fragments.
*/
entry = hlist_entry(chain->first, struct batadv_frag_list_entry, list);
hlist_del(&entry->list);
skb_out = entry->skb;
kfree(entry);
packet = (struct batadv_frag_packet *)skb_out->data;
size = ntohs(packet->total_size) + hdr_size;
/* Make room for the rest of the fragments. */
if (pskb_expand_head(skb_out, 0, size - skb_out->len, GFP_ATOMIC) < 0) {
kfree_skb(skb_out);
skb_out = NULL;
dropped = true;
goto free;
}
/* Move the existing MAC header to just before the payload. (Override
* the fragment header.)
*/
skb_pull(skb_out, hdr_size);
skb_out->ip_summed = CHECKSUM_NONE;
memmove(skb_out->data - ETH_HLEN, skb_mac_header(skb_out), ETH_HLEN);
skb_set_mac_header(skb_out, -ETH_HLEN);
skb_reset_network_header(skb_out);
skb_reset_transport_header(skb_out);
/* Copy the payload of the each fragment into the last skb */
hlist_for_each_entry(entry, chain, list) {
size = entry->skb->len - hdr_size;
skb_put_data(skb_out, entry->skb->data + hdr_size, size);
}
free:
/* Locking is not needed, because 'chain' is not part of any orig. */
batadv_frag_clear_chain(chain, dropped);
return skb_out;
}
/**
* batadv_frag_skb_buffer() - buffer fragment for later merge
* @skb: skb to buffer
* @orig_node_src: originator that the skb is received from
*
* Add fragment to buffer and merge fragments if possible.
*
* There are three possible outcomes: 1) Packet is merged: Return true and
* set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
* to NULL; 3) Error: Return false and free skb.
*
* Return: true when the packet is merged or buffered, false when skb is not
* used.
*/
bool batadv_frag_skb_buffer(struct sk_buff **skb,
struct batadv_orig_node *orig_node_src)
{
struct sk_buff *skb_out = NULL;
struct hlist_head head = HLIST_HEAD_INIT;
bool ret = false;
/* Add packet to buffer and table entry if merge is possible. */
if (!batadv_frag_insert_packet(orig_node_src, *skb, &head))
goto out_err;
/* Leave if more fragments are needed to merge. */
if (hlist_empty(&head))
goto out;
skb_out = batadv_frag_merge_packets(&head);
if (!skb_out)
goto out_err;
out:
ret = true;
out_err:
*skb = skb_out;
return ret;
}
/**
* batadv_frag_skb_fwd() - forward fragments that would exceed MTU when merged
* @skb: skb to forward
* @recv_if: interface that the skb is received on
* @orig_node_src: originator that the skb is received from
*
* Look up the next-hop of the fragments payload and check if the merged packet
* will exceed the MTU towards the next-hop. If so, the fragment is forwarded
* without merging it.
*
* Return: true if the fragment is consumed/forwarded, false otherwise.
*/
bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_hard_iface *recv_if,
struct batadv_orig_node *orig_node_src)
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
struct batadv_orig_node *orig_node_dst;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_frag_packet *packet;
u16 total_size;
bool ret = false;
packet = (struct batadv_frag_packet *)skb->data;
orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
if (!orig_node_dst)
goto out;
neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
if (!neigh_node)
goto out;
/* Forward the fragment, if the merged packet would be too big to
* be assembled.
*/
total_size = ntohs(packet->total_size);
if (total_size > neigh_node->if_incoming->net_dev->mtu) {
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_FWD);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_FWD_BYTES,
skb->len + ETH_HLEN);
packet->ttl--;
batadv_send_unicast_skb(skb, neigh_node);
ret = true;
}
out:
if (orig_node_dst)
batadv_orig_node_put(orig_node_dst);
if (neigh_node)
batadv_neigh_node_put(neigh_node);
return ret;
}
/**
* batadv_frag_create() - create a fragment from skb
* @net_dev: outgoing device for fragment
* @skb: skb to create fragment from
* @frag_head: header to use in new fragment
* @fragment_size: size of new fragment
*
* Split the passed skb into two fragments: A new one with size matching the
* passed mtu and the old one with the rest. The new skb contains data from the
* tail of the old skb.
*
* Return: the new fragment, NULL on error.
*/
static struct sk_buff *batadv_frag_create(struct net_device *net_dev,
struct sk_buff *skb,
struct batadv_frag_packet *frag_head,
unsigned int fragment_size)
{
unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev);
unsigned int tailroom = net_dev->needed_tailroom;
struct sk_buff *skb_fragment;
unsigned int header_size = sizeof(*frag_head);
unsigned int mtu = fragment_size + header_size;
skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom);
if (!skb_fragment)
goto err;
skb_fragment->priority = skb->priority;
/* Eat the last mtu-bytes of the skb */
skb_reserve(skb_fragment, ll_reserved + header_size);
skb_split(skb, skb_fragment, skb->len - fragment_size);
/* Add the header */
skb_push(skb_fragment, header_size);
memcpy(skb_fragment->data, frag_head, header_size);
err:
return skb_fragment;
}
/**
* batadv_frag_send_packet() - create up to 16 fragments from the passed skb
* @skb: skb to create fragments from
* @orig_node: final destination of the created fragments
* @neigh_node: next-hop of the created fragments
*
* Return: the netdev tx status or a negative errno code on a failure
*/
int batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
struct batadv_neigh_node *neigh_node)
{
struct net_device *net_dev = neigh_node->if_incoming->net_dev;
struct batadv_priv *bat_priv;
struct batadv_hard_iface *primary_if = NULL;
struct batadv_frag_packet frag_header;
struct sk_buff *skb_fragment;
unsigned int mtu = net_dev->mtu;
unsigned int header_size = sizeof(frag_header);
unsigned int max_fragment_size, num_fragments;
int ret;
/* To avoid merge and refragmentation at next-hops we never send
* fragments larger than BATADV_FRAG_MAX_FRAG_SIZE
*/
mtu = min_t(unsigned int, mtu, BATADV_FRAG_MAX_FRAG_SIZE);
max_fragment_size = mtu - header_size;
if (skb->len == 0 || max_fragment_size == 0)
return -EINVAL;
num_fragments = (skb->len - 1) / max_fragment_size + 1;
max_fragment_size = (skb->len - 1) / num_fragments + 1;
/* Don't even try to fragment, if we need more than 16 fragments */
if (num_fragments > BATADV_FRAG_MAX_FRAGMENTS) {
ret = -EAGAIN;
goto free_skb;
}
bat_priv = orig_node->bat_priv;
primary_if = batadv_primary_if_get_selected(bat_priv);
if (!primary_if) {
ret = -EINVAL;
goto free_skb;
}
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;
frag_header.ttl = BATADV_TTL;
frag_header.seqno = htons(atomic_inc_return(&bat_priv->frag_seqno));
frag_header.reserved = 0;
frag_header.no = 0;
frag_header.total_size = htons(skb->len);
/* skb->priority values from 256->263 are magic values to
* directly indicate a specific 802.1d priority. This is used
* to allow 802.1d priority to be passed directly in from VLAN
* tags, etc.
*/
if (skb->priority >= 256 && skb->priority <= 263)
frag_header.priority = skb->priority - 256;
else
frag_header.priority = 0;
ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr);
ether_addr_copy(frag_header.dest, orig_node->orig);
/* Eat and send fragments from the tail of skb */
while (skb->len > max_fragment_size) {
/* The initial check in this function should cover this case */
if (unlikely(frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)) {
ret = -EINVAL;
goto put_primary_if;
}
skb_fragment = batadv_frag_create(net_dev, skb, &frag_header,
max_fragment_size);
if (!skb_fragment) {
ret = -ENOMEM;
goto put_primary_if;
}
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb_fragment->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb_fragment, neigh_node);
if (ret != NET_XMIT_SUCCESS) {
ret = NET_XMIT_DROP;
goto put_primary_if;
}
frag_header.no++;
}
/* make sure that there is at least enough head for the fragmentation
* and ethernet headers
*/
ret = skb_cow_head(skb, ETH_HLEN + header_size);
if (ret < 0)
goto put_primary_if;
skb_push(skb, header_size);
memcpy(skb->data, &frag_header, header_size);
/* Send the last fragment */
batadv_inc_counter(bat_priv, BATADV_CNT_FRAG_TX);
batadv_add_counter(bat_priv, BATADV_CNT_FRAG_TX_BYTES,
skb->len + ETH_HLEN);
ret = batadv_send_unicast_skb(skb, neigh_node);
/* skb was consumed */
skb = NULL;
put_primary_if:
batadv_hardif_put(primary_if);
free_skb:
kfree_skb(skb);
return ret;
}