From a78350d91a67f977608c2f47b4b17d6e045e7930 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 27 Apr 2016 14:57:33 -0700 Subject: [PATCH] stt: linearize for CONFIG_SLUB case STT implementation I saw performance improvements with linearizing skb for SLUB case. So following patch skips zero copy operation for such a case. First change is to reassembly code where in-order packet is merged to head, if there is no room to merge it then combined packet is linearized. Second case is of reassembly of out-of-order packets. In this case the list of packets is linearized before sending it up to datapath. Performance number for large packet TCP test using netperf. OVS branch TCP Host0 Host1 version Gbps CPU% CPU% ----------------------------------------- 2.5 9.4 272 315 master + 9.4 230 285 patch Tested-By: Vasmi Abidi Signed-off-by: Pravin B Shelar Acked-by: Jesse Gross --- datapath/linux/compat/stt.c | 242 +++++++++++++++++++++++++----------- 1 file changed, 168 insertions(+), 74 deletions(-) diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c index eb397e85bb0..86d225e868f 100644 --- a/datapath/linux/compat/stt.c +++ b/datapath/linux/compat/stt.c @@ -49,6 +49,14 @@ #define STT_DST_PORT 7471 #ifdef OVS_STT +#ifdef CONFIG_SLUB +/* + * We saw better performance with skipping zero copy in case of SLUB. + * So skip zero copy for SLUB case. + */ +#define SKIP_ZERO_COPY +#endif + #define STT_VER 0 /* @list: Per-net list of STT ports. @@ -219,73 +227,6 @@ static int clear_gso(struct sk_buff *skb) return 0; } -static struct sk_buff *normalize_frag_list(struct sk_buff *head, - struct sk_buff **skbp) -{ - struct sk_buff *skb = *skbp; - struct sk_buff *last; - - do { - struct sk_buff *frags; - - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (unlikely(!nskb)) - return ERR_PTR(-ENOMEM); - - nskb->next = skb->next; - consume_skb(skb); - skb = nskb; - *skbp = skb; - } - - if (head) { - head->len -= skb->len; - head->data_len -= skb->len; - head->truesize -= skb->truesize; - } - - frags = skb_shinfo(skb)->frag_list; - if (frags) { - int err; - - err = skb_unclone(skb, GFP_ATOMIC); - if (unlikely(err)) - return ERR_PTR(err); - - last = normalize_frag_list(skb, &frags); - if (IS_ERR(last)) - return last; - - skb_shinfo(skb)->frag_list = NULL; - last->next = skb->next; - skb->next = frags; - } else { - last = skb; - } - - skbp = &skb->next; - } while ((skb = skb->next)); - - return last; -} - -/* Takes a linked list of skbs, which potentially contain frag_list - * (whose members in turn potentially contain frag_lists, etc.) and - * converts them into a single linear linked list. - */ -static int straighten_frag_list(struct sk_buff **skbp) -{ - struct sk_buff *err_skb; - - err_skb = normalize_frag_list(NULL, skbp); - if (IS_ERR(err_skb)) - return PTR_ERR(err_skb); - - return 0; -} - static void copy_skb_metadata(struct sk_buff *to, struct sk_buff *from) { to->protocol = from->protocol; @@ -465,6 +406,74 @@ static int skb_list_segment(struct sk_buff *head, bool ipv4, int l4_offset) return 0; } +#ifndef SKIP_ZERO_COPY +static struct sk_buff *normalize_frag_list(struct sk_buff *head, + struct sk_buff **skbp) +{ + struct sk_buff *skb = *skbp; + struct sk_buff *last; + + do { + struct sk_buff *frags; + + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (unlikely(!nskb)) + return ERR_PTR(-ENOMEM); + + nskb->next = skb->next; + consume_skb(skb); + skb = nskb; + *skbp = skb; + } + + if (head) { + head->len -= skb->len; + head->data_len -= skb->len; + head->truesize -= skb->truesize; + } + + frags = skb_shinfo(skb)->frag_list; + if (frags) { + int err; + + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + return ERR_PTR(err); + + last = normalize_frag_list(skb, &frags); + if (IS_ERR(last)) + return last; + + skb_shinfo(skb)->frag_list = NULL; + last->next = skb->next; + skb->next = frags; + } else { + last = skb; + } + + skbp = &skb->next; + } while ((skb = skb->next)); + + return last; +} + +/* Takes a linked list of skbs, which potentially contain frag_list + * (whose members in turn potentially contain frag_lists, etc.) and + * converts them into a single linear linked list. + */ +static int straighten_frag_list(struct sk_buff **skbp) +{ + struct sk_buff *err_skb; + + err_skb = normalize_frag_list(NULL, skbp); + if (IS_ERR(err_skb)) + return PTR_ERR(err_skb); + + return 0; +} + static int coalesce_skb(struct sk_buff **headp) { struct sk_buff *frag, *head, *prev; @@ -510,6 +519,34 @@ static int coalesce_skb(struct sk_buff **headp) head->next = NULL; return 0; } +#else +static int coalesce_skb(struct sk_buff **headp) +{ + struct sk_buff *frag, *head = *headp, *next; + int delta = FRAG_CB(head)->first.tot_len - skb_headlen(head); + int err; + + if (unlikely(!head->next)) + return 0; + + err = pskb_expand_head(head, 0, delta, GFP_ATOMIC); + if (unlikely(err)) + return err; + + if (unlikely(!__pskb_pull_tail(head, head->data_len))) + BUG(); + + for (frag = head->next; frag; frag = next) { + skb_copy_bits(frag, 0, skb_put(head, frag->len), frag->len); + next = frag->next; + kfree_skb(frag); + } + + head->next = NULL; + head->truesize = SKB_TRUESIZE(head->len); + return 0; +} +#endif static int __try_to_segment(struct sk_buff *skb, bool csum_partial, bool ipv4, bool tcp, int l4_offset) @@ -522,6 +559,12 @@ static int __try_to_segment(struct sk_buff *skb, bool csum_partial, static int try_to_segment(struct sk_buff *skb) { +#ifdef SKIP_ZERO_COPY + /* coalesce_skb() since does not generate frag-list no need to + * linearize it here. + */ + return 0; +#else struct stthdr *stth = stt_hdr(skb); bool csum_partial = !!(stth->flags & STT_CSUM_PARTIAL); bool ipv4 = !!(stth->flags & STT_PROTO_IPV4); @@ -529,16 +572,19 @@ static int try_to_segment(struct sk_buff *skb) int l4_offset = stth->l4_offset; return __try_to_segment(skb, csum_partial, ipv4, tcp, l4_offset); +#endif } static int segment_skb(struct sk_buff **headp, bool csum_partial, bool ipv4, bool tcp, int l4_offset) { +#ifndef SKIP_ZERO_COPY int err; err = coalesce_skb(headp); if (err) return err; +#endif if (skb_shinfo(*headp)->frag_list) return __try_to_segment(*headp, csum_partial, @@ -1054,16 +1100,58 @@ static struct pkt_frag *lookup_frag(struct net *net, return victim_frag; } +#ifdef SKIP_ZERO_COPY +static int __copy_skb(struct sk_buff *to, struct sk_buff *from, + int *delta, bool *headstolen) +{ + int err; + + if (unlikely(to->next)) + return -EINVAL; + + if (unlikely(FRAG_CB(to)->offset)) + return -EINVAL; + + if (unlikely(skb_unclone(to, GFP_ATOMIC))) + return -ENOMEM; + + if (skb_try_coalesce(to, from, headstolen, delta)) + return 0; + + *headstolen = false; + err = pskb_expand_head(to, 0, to->data_len + from->len, GFP_ATOMIC); + if (unlikely(err)) + return err; + + if (unlikely(!__pskb_pull_tail(to, to->data_len))) + BUG(); + + skb_copy_bits(from, 0, skb_put(to, from->len), from->len); + + *delta = from->len; + to->truesize += from->len; + return 0; +} +#else +static int __copy_skb(struct sk_buff *to, struct sk_buff *from, + int *delta, bool *headstolen) +{ + *headstolen = false; + return -EINVAL; +} +#endif + static struct sk_buff *reassemble(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); struct tcphdr *tcph = tcp_hdr(skb); u32 seq = ntohl(tcph->seq); struct stt_percpu *stt_percpu; - struct sk_buff *last_skb; + struct sk_buff *last_skb, *copied_skb = NULL; struct pkt_frag *frag; struct pkt_key key; - int tot_len; + int tot_len, delta = skb->truesize; + bool headstolen; u32 hash; tot_len = seq >> STT_SEQ_LEN_SHIFT; @@ -1103,7 +1191,6 @@ static struct sk_buff *reassemble(struct sk_buff *skb) FRAG_CB(skb)->first.set_ecn_ce = false; list_add_tail(&frag->lru_node, &stt_percpu->frag_lru); stt_percpu->frag_mem_used += skb->truesize; - skb = NULL; goto unlock; } @@ -1114,8 +1201,13 @@ static struct sk_buff *reassemble(struct sk_buff *skb) last_skb = FRAG_CB(frag->skbs)->first.last_skb; if (likely(FRAG_CB(last_skb)->offset + last_skb->len == FRAG_CB(skb)->offset)) { - last_skb->next = skb; - FRAG_CB(frag->skbs)->first.last_skb = skb; + + if (!__copy_skb(frag->skbs, skb, &delta, &headstolen)) { + copied_skb = skb; + } else { + last_skb->next = skb; + FRAG_CB(frag->skbs)->first.last_skb = skb; + } } else { struct sk_buff *prev = NULL, *next; @@ -1154,8 +1246,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb) FRAG_CB(frag->skbs)->first.set_ecn_ce |= INET_ECN_is_ce(iph->tos); FRAG_CB(frag->skbs)->first.rcvd_len += skb->len; - FRAG_CB(frag->skbs)->first.mem_used += skb->truesize; - stt_percpu->frag_mem_used += skb->truesize; + stt_percpu->frag_mem_used += delta; + FRAG_CB(frag->skbs)->first.mem_used += delta; if (FRAG_CB(frag->skbs)->first.tot_len == FRAG_CB(frag->skbs)->first.rcvd_len) { @@ -1174,6 +1266,8 @@ static struct sk_buff *reassemble(struct sk_buff *skb) skb = NULL; } + if (copied_skb) + kfree_skb_partial(copied_skb, headstolen); goto unlock; unlock_free: