Skip to content

Commit

Permalink
ipvs: add connection redirect support in fnat/snat/nat modes.
Browse files Browse the repository at this point in the history
Since IPv6 flow director is not well-supported by ixgbe driver and only one
single local IPv6 address can be set in "signature" mode, the below solution of
connection redirect provides the way to support IPv6 packet processing with
multiple local IPv6 addresses configured in "signature" mode.

o Add the switch to enable or disable connection redirect.

o In the case connection redirect is enabled, during the system boots up, the
  below resource is pre-allcoated.
- Per-socket based connection redirect cache;
- Per-socket based global connection redirect hash table;
- Each lcore allocates its respective packet redirect rings for each other
  lcores to avoid the contention of enqueuing the packets in the same ring.

o When a connection is created and hashed in fnat/nat/snat modes, the related
  redirect is allocated and hashed accordingly.

o When a connection expires to be unhashed and freed in fnat/nat/snat modes, the
  related redirect is unhashed and freed accordingly.

o In the stage of PRE_ROUTING, if the packet does not match any dpvs connection,
  then check if it matches any connection redirect entry. If matched, enqueue
  the packet into the packet redirect ring of the rediret owner core; otherwise,
  continue to process it on the current lcore.

o Within lcore_job_recv_fwd(), add the task of dequeuing the packets from all
  the packet redirect rings owned by the current lcore and process them
  accordingly.
  • Loading branch information
zhuangyan committed Jan 11, 2019
1 parent 70741bb commit a9d16e3
Show file tree
Hide file tree
Showing 12 changed files with 974 additions and 238 deletions.
4 changes: 2 additions & 2 deletions conf/dpvs.conf.items
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

! global config
global_defs {
#daemon <disalbe>
#daemon <disalbe>
log_level INFO <none>
log_file /var/log/dpvs.log <none>
}
Expand Down Expand Up @@ -189,6 +189,7 @@ ipvs_defs {
conn_init_timeout 3 <3, 1-31535999>
expire_quiescent_template <disable>
fast_xmit_close <disable>
redirect off <off/on: disable/enable packet redirect>
}

udp {
Expand Down Expand Up @@ -243,4 +244,3 @@ ipvs_defs {
sa_pool {
<init> pool_hash_size 16 <16, 1-128>
}

56 changes: 44 additions & 12 deletions include/ipvs/conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "ipvs/conn.h"
#include "ipvs/proto.h"
#include "ipvs/service.h"
#include "ipvs/redirect.h"

enum {
DPVS_CONN_DIR_INBOUND = 0,
Expand All @@ -35,11 +36,12 @@ enum {
};

enum {
DPVS_CONN_F_HASHED = 0x0040,
DPVS_CONN_F_INACTIVE = 0x0100,
DPVS_CONN_F_SYNPROXY = 0x8000,
DPVS_CONN_F_TEMPLATE = 0x1000,
DPVS_CONN_F_NOFASTXMIT = 0x2000,
DPVS_CONN_F_HASHED = 0x0040,
DPVS_CONN_F_REDIRECT_HASHED = 0x0080,
DPVS_CONN_F_INACTIVE = 0x0100,
DPVS_CONN_F_SYNPROXY = 0x8000,
DPVS_CONN_F_TEMPLATE = 0x1000,
DPVS_CONN_F_NOFASTXMIT = 0x2000,
};

struct dp_vs_conn_param {
Expand Down Expand Up @@ -147,6 +149,9 @@ struct dp_vs_conn {
struct dp_vs_conn *control; /* master who controlls me */
rte_atomic32_t n_control; /* number of connections controlled by me*/
uint64_t ctime; /* create time */

/* connection redirect in fnat/snat/nat modes */
struct dp_vs_redirect *redirect;
} __rte_cache_aligned;

/* for syn-proxy to save all ack packet in conn before rs's syn-ack arrives */
Expand All @@ -162,18 +167,18 @@ struct dp_vs_synproxy_ack_pakcet {
int dp_vs_conn_init(void);
int dp_vs_conn_term(void);

struct dp_vs_conn *
dp_vs_conn_new(struct rte_mbuf *mbuf,
struct dp_vs_conn *
dp_vs_conn_new(struct rte_mbuf *mbuf,
const struct dp_vs_iphdr *iph,
struct dp_vs_conn_param *param,
struct dp_vs_dest *dest,
uint32_t flags);
int dp_vs_conn_del(struct dp_vs_conn *conn);

struct dp_vs_conn *
dp_vs_conn_get(int af, uint16_t proto,
const union inet_addr *saddr,
const union inet_addr *daddr,
dp_vs_conn_get(int af, uint16_t proto,
const union inet_addr *saddr,
const union inet_addr *daddr,
uint16_t sport, uint16_t dport,
int *dir, bool reverse);

Expand All @@ -190,8 +195,8 @@ void dp_vs_conn_put_no_reset(struct dp_vs_conn *conn);
void ipvs_conn_keyword_value_init(void);
void install_ipvs_conn_keywords(void);

static inline void dp_vs_conn_fill_param(int af, uint8_t proto,
const union inet_addr *caddr, const union inet_addr *vaddr,
static inline void dp_vs_conn_fill_param(int af, uint8_t proto,
const union inet_addr *caddr, const union inet_addr *vaddr,
uint16_t cport, uint16_t vport, uint16_t ct_dport,
struct dp_vs_conn_param *param)
{
Expand Down Expand Up @@ -270,4 +275,31 @@ static inline void dp_vs_control_add(struct dp_vs_conn *conn, struct dp_vs_conn
rte_atomic32_inc(&ctl_conn->n_control);
}

static inline bool
dp_vs_conn_is_redirect_hashed(struct dp_vs_conn *conn)
{
return (conn->flags & DPVS_CONN_F_REDIRECT_HASHED) ? true : false;
}

static inline void
dp_vs_conn_set_redirect_hashed(struct dp_vs_conn *conn)
{
conn->flags |= DPVS_CONN_F_REDIRECT_HASHED;
}

static inline void
dp_vs_conn_clear_redirect_hashed(struct dp_vs_conn *conn)
{
conn->flags &= ~DPVS_CONN_F_REDIRECT_HASHED;
}

inline uint32_t dp_vs_conn_hashkey(int af,
const union inet_addr *saddr, uint16_t sport,
const union inet_addr *daddr, uint16_t dport,
uint32_t mask);
int dp_vs_conn_pool_size(void);
int dp_vs_conn_pool_cache_size(void);

extern bool dp_vs_redirect_disable;

#endif /* __DPVS_CONN_H__ */
18 changes: 9 additions & 9 deletions include/ipvs/proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ struct dp_vs_proto {
int (*exit)(struct dp_vs_proto *proto);

/* schedule RS and create new conn */
int (*conn_sched)(struct dp_vs_proto *proto,
int (*conn_sched)(struct dp_vs_proto *proto,
const struct dp_vs_iphdr *iph,
struct rte_mbuf *mbuf,
struct dp_vs_conn **conn,
Expand All @@ -45,12 +45,12 @@ struct dp_vs_proto {
/* lookup conn by <proto, saddr, sport, daddr, dport>
* return conn and direction or NULL if miss */
struct dp_vs_conn *
(*conn_lookup)(struct dp_vs_proto *proto,
(*conn_lookup)(struct dp_vs_proto *proto,
const struct dp_vs_iphdr *iph,
struct rte_mbuf *mbuf, int *direct,
bool reverse, bool *drop);
struct rte_mbuf *mbuf, int *direct,
bool reverse, bool *drop, lcoreid_t *peer_cid);

int (*conn_expire)(struct dp_vs_proto *proto,
int (*conn_expire)(struct dp_vs_proto *proto,
struct dp_vs_conn *conn);

/* for NAT mode */
Expand Down Expand Up @@ -87,13 +87,13 @@ struct dp_vs_proto {
int (*csum_check)(struct dp_vs_proto *proto, int af,
struct rte_mbuf *mbuf);
int (*dump_packet)(struct dp_vs_proto *proto, int af,
struct rte_mbuf *mbuf, int off,
struct rte_mbuf *mbuf, int off,
const char *msg);

/* try trans connn's states by packet and direction */
int (*state_trans)(struct dp_vs_proto *proto,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf,
int (*state_trans)(struct dp_vs_proto *proto,
struct dp_vs_conn *conn,
struct rte_mbuf *mbuf,
int direct);

const char *
Expand Down
61 changes: 61 additions & 0 deletions include/ipvs/redirect.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* DPVS is a software load balancer (Virtual Server) based on DPDK.
*
* Copyright (C) 2017 iQIYI (www.iqiyi.com).
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#ifndef __DPVS_REDIRECT_H__
#define __DPVS_REDIRECT_H__
#include "common.h"
#include "list.h"
#include "dpdk.h"
#include "netif.h"
#include "ipvs/conn.h"
#include "ipvs/dest.h"

/*
* The conneciton redirect tuple is only for the reverse tuple
* (inside -> outside) in nat-mode.
*/
struct dp_vs_redirect {
struct list_head list;

uint8_t af;
uint8_t proto;
lcoreid_t cid;
uint8_t padding;

union inet_addr saddr;
union inet_addr daddr;
uint16_t sport;
uint16_t dport;

struct rte_mempool *redirect_pool;
} __rte_cache_aligned;

struct dp_vs_redirect *dp_vs_redirect_alloc(enum dpvs_fwd_mode fwdmode);
void dp_vs_redirect_free(struct dp_vs_conn *conn);
void dp_vs_redirect_hash(struct dp_vs_conn *conn);
void dp_vs_redirect_unhash(struct dp_vs_conn *conn);
struct dp_vs_redirect *dp_vs_redirect_get(int af, uint16_t proto,
const union inet_addr *saddr, const union inet_addr *daddr,
uint16_t sport, uint16_t dport);
void dp_vs_redirect_init(struct dp_vs_conn *conn);
int dp_vs_redirect_table_init(void);
int dp_vs_redirect_pkt(struct rte_mbuf *mbuf, lcoreid_t peer_cid);
void dp_vs_redirect_ring_proc(struct netif_queue_conf *qconf, lcoreid_t cid);
int dp_vs_redirects_init(void);
int dp_vs_redirects_term(void);

#endif /* __DPVS_REDIRECT_H__ */
11 changes: 7 additions & 4 deletions include/netif.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ struct rx_partner;
/* RX/TX queue conf for lcore */
struct netif_queue_conf
{
queueid_t id;
queueid_t id;
uint16_t len;
uint16_t kni_len;
struct rx_partner *isol_rxq;
Expand All @@ -84,7 +84,7 @@ struct netif_queue_conf
*/
struct netif_port_conf
{
portid_t id;
portid_t id;
/* rx/tx queues for this lcore to process*/
int nrxq;
int ntxq;
Expand All @@ -99,7 +99,7 @@ struct netif_port_conf
*/
struct netif_lcore_conf
{
lcoreid_t id;
lcoreid_t id;
/* nic number of this lcore to process */
int nports;
/* port list of this lcore to process */
Expand Down Expand Up @@ -283,13 +283,14 @@ int netif_lcore_loop_job_register(struct netif_lcore_loop_job *lcore_job);
int netif_lcore_loop_job_unregister(struct netif_lcore_loop_job *lcore_job);
int netif_lcore_start(void);
bool is_lcore_id_valid(lcoreid_t cid);
bool netif_lcore_is_idle(lcoreid_t cid);

/************************** protocol API *****************************/
int netif_register_pkt(struct pkt_type *pt);
int netif_unregister_pkt(struct pkt_type *pt);

/**************************** port API ******************************/
int netif_fdir_filter_set(struct netif_port *port, enum rte_filter_op opcode,
int netif_fdir_filter_set(struct netif_port *port, enum rte_filter_op opcode,
const struct rte_eth_fdir_filter *fdir_flt);
void netif_mask_fdir_filter(int af, const struct netif_port *port,
struct rte_eth_fdir_filter *filt);
Expand Down Expand Up @@ -363,5 +364,7 @@ static inline char *eth_addr_dump(const struct ether_addr *ea,
}

portid_t netif_port_count(void);
void lcore_process_packets(struct netif_queue_conf *qconf, struct rte_mbuf **mbufs,
lcoreid_t cid, uint16_t count, bool pkts_from_ring);

#endif /* __DPVS_NETIF_H__ */
Loading

0 comments on commit a9d16e3

Please sign in to comment.