Skip to content

Commit

Permalink
调教IPv4重组相关文档及代码注释。
Browse files Browse the repository at this point in the history
  • Loading branch information
luozh committed Oct 31, 2017
1 parent a647c97 commit 38e3fbc
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 27 deletions.
90 changes: 89 additions & 1 deletion IP分片和重组.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,92 @@
buf保存分片数据
调用此此函数时需要将传入mbuf的ipv4头部的DF字段设置为0表示允许分片

重组
重组
dpdk重组通过hash表来存储收到的分片并设置了老化时间
IPv4报文有一个16位的标识字段每个分片的标识字段相同MF表示还有其他分片为0表示没有其他分片DF为1表示不运行分片DF为0表示运行分片.
第一个分片和没分片的报文的偏移为0后续分片此字段不为1.
dpdk采用Cuckoo Hashing算法来实现cuckoo hashing哈希函数会生成两个key值先通过第一个key值找到篮子看是否能够存储如果不能存储那么久使用第二个key值最终会分配一个新的或者找到的
ip_frag_pkt结构体如果此结构体超时那么会将此结构体重新初始化并释放原资源

结构体介绍
1.struct rte_ip_frag_tbl
{
uint64_t max_cycles; //老化时间
uint32_t entry_mask; /**< hash value mask. */
uint32_t max_entries; //允许最大条目个数
uint32_t use_entries; //已经运用的条目个数
uint32_t bucket_entries; //每个篮子条目个数
uint32_t nb_entries; //分配的最大条目个数
uint32_t nb_buckets; //篮子的个数
struct ip_frag_pkt *last; /**< last used entry. */
struct ip_pkt_list lru; /**< LRU list for table entries. */
struct ip_frag_tbl_stat stat; /**< statistics counters. */
struct ip_frag_pkt pkt[0]; /**< hash table. */
};
作用存储分片的hash结构体

2.struct rte_ip_frag_death_row
{
uint32_t cnt; //个数
struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)];//缓冲区
};
作用用于存储老化了的mbuf,便于调用函数rte_ip_frag_free_death_row释放mbuf,其中为cnt为计数

3.struct ip_frag_key
{
uint64_t src_dst[4]; //ipv4的源地址第一个字节
uint32_t id; // 用于保存IPV4标识字段,每发送一个报文,此字段加一
uint32_t key_len; /**< 长度 */
};
作用分片hash表的key结构体

4.struct ip_frag_pkt
{
TAILQ_ENTRY(ip_frag_pkt) lru; /**< LRU list */
struct ip_frag_key key; //段的key
uint64_t start; /**< creation timestamp */
uint32_t total_size; /**< 期待到达的分片的总大小 */
uint32_t frag_size; /**< 已经到达分片所有数据总大小 */
uint32_t last_idx; /**< index of next entry to fill */
struct ip_frag frags[IP_MAX_FRAG_NUM]; /**< fragments */
} __rte_cache_aligned;
作用结构体会用于保存所有的分片信息其中frags用于保存具体的分片信息

struct ip_frag
{
uint16_t ofs; /**< offset into the packet */
uint16_t len; /**< length of fragment */
struct rte_mbuf *mb; /**< fragment mbuf */
};


函数介绍
1.struct rte_ip_frag_tbl * rte_ip_frag_table_create(
uint32_t bucket_num, //ip分片hash表的篮子个数
uint32_t bucket_entries, //每个篮子的条目个数
uint32_t max_entries, //ip hash表最多能够存储条目的个数,少于bucket_num * bucket_entries.
uint64_t max_cycles, //最大老化时间
int socket_id);
作用用于创建ip分片表存储接收到的分片

2.static inline void rte_ip_frag_table_destroy( struct rte_ip_frag_tbl *tbl)
作用删除IP分片表

3.static inline int rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr)
作用函数通过判断IPv4头部的MF标记和数据偏移是否都为0来决定此报文是不是分片都为0表示报文不是分片报文

4.struct rte_mbuf * rte_ipv4_frag_reassemble_packet(
struct rte_ip_frag_tbl *tbl, //分片hash表
struct rte_ip_frag_death_row *dr, //释放结构体
struct rte_mbuf *mb, //当前分片mbuf
uint64_t tms, //时钟
struct ipv4_hdr *ip_hdr) //ip头部结构体

5.void rte_ip_frag_free_death_row(
struct rte_ip_frag_death_row *dr, //已经老化的缓冲区结构
uint32_t prefetch) //在释放前预取多少mbuf,用于加快释放速度

6.void rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl);
作用向流中输入IP分表信息


2 changes: 1 addition & 1 deletion dpdk-16.04/dpdk-16.04/examples/ip_reassembly/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ reassemble(struct rte_mbuf *m, uint8_t portid, uint32_t queue,

ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1);

/* if it is a fragmented packet, then try to reassemble. */
//判断此报文是否是分片
if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) {
struct rte_mbuf *mo;

Expand Down
48 changes: 34 additions & 14 deletions dpdk-16.04/dpdk-16.04/lib/librte_ip_frag/ip_frag_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,26 +136,41 @@ ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2)
*v2 = (v << 7) + (v >> 14);
}


/*
函数通过判断:
1.如果此分片是第一个分片,有如下逻辑:
如果第一分片的位置即数组索引为1的位置已经有mbuf保存了,那么会释放此结构,并返回NULL;如果没有被占用,将分片保存到数组中。
2.如果此分片是最后一个分片,有如下逻辑:
如果最后一分片的位置即数组索引为0的位置已经有mbuf保存了,那么会释放此结构,并返回NULL;如果没有被占用,将分片保存到数组中。
3.如果此分片是中间分片,有入戏按逻辑:
将分片保存到数组相应索引中。
判断是否接收完所有的分片,如果未接收完分片,返回NULL;如果接收完分片,进行重组。
如果重组后,返回的mbuf为NULL,那么释放所有的资源;如果返回的mbuf不为空,那么将此节点的key重置为未使用,并返回重组后的mbuf。
*/
struct rte_mbuf *
ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
{
uint32_t idx;

//计算所有已经到达分片的大小
fp->frag_size += len;

/* this is the first fragment. */
//是第一个分片
if (ofs == 0) {
idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
IP_FIRST_FRAG_IDX : UINT32_MAX;

/* this is the last fragment. */
//是最后一个分片
} else if (more_frags == 0) {
fp->total_size = ofs + len;
idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
IP_LAST_FRAG_IDX : UINT32_MAX;

/* this is the intermediate fragment. */
//这是中间片段
} else if ((idx = fp->last_idx) <
sizeof (fp->frags) / sizeof (fp->frags[0])) {
fp->last_idx++;
Expand All @@ -165,6 +180,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
* errorneous packet: either exceeed max allowed number of fragments,
* or duplicate first/last fragment encountered.
*/
//错误:索引大于能够保存分片的缓存数组的大小
if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) {

/* report an error. */
Expand Down Expand Up @@ -196,24 +212,26 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
fp->frags[IP_LAST_FRAG_IDX].len);

/* free all fragments, invalidate the entry. */
//释放所有分片,并将无效话节点
ip_frag_free(fp, dr);
ip_frag_key_invalidate(&fp->key);
IP_FRAG_MBUF2DR(dr, mb);

return NULL;
}

//赋值
fp->frags[idx].ofs = ofs;
fp->frags[idx].len = len;
fp->frags[idx].mb = mb;

mb = NULL;

/* not all fragments are collected yet. */
//不是所有的分片都到达,返回NULL
if (likely (fp->frag_size < fp->total_size)) {
return mb;

/* if we collected all fragments, then try to reassemble. */
//所有分片都叨叨,进行重组
} else if (fp->frag_size == fp->total_size &&
fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {
if (fp->key.key_len == IPV4_KEYLEN)
Expand All @@ -222,7 +240,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
mb = ipv6_frag_reassemble(fp);
}

/* errorenous set of fragments. */
//如果mb为NULL,那么是重组错误,那么充值结构体fp,并将所有分片加入到释放空间中。
if (mb == NULL) {

/* report an error. */
Expand Down Expand Up @@ -253,11 +271,11 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
fp->frags[IP_LAST_FRAG_IDX].ofs,
fp->frags[IP_LAST_FRAG_IDX].len);

/* free associated resources. */
//释放资源
ip_frag_free(fp, dr);
}

/* we are done with that entry, invalidate it. */
//充值key为未使用
ip_frag_key_invalidate(&fp->key);
return mb;
}
Expand Down Expand Up @@ -285,6 +303,7 @@ ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,

IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);

//通过key查找hash
if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {

/*timed-out entry, free and invalidate it*/
Expand Down Expand Up @@ -346,6 +365,7 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
max_cycles = tbl->max_cycles;
assoc = tbl->bucket_entries;

//如果有最后一个使用的元素,那么比较最后一个元素的key值是否相等。
if (tbl->last != NULL && ip_frag_key_cmp(key, &tbl->last->key) == 0)
return tbl->last;

Expand Down Expand Up @@ -378,11 +398,11 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
p1, i, assoc,
IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start);

if (ip_frag_key_cmp(key, &p1[i].key) == 0)
if (ip_frag_key_cmp(key, &p1[i].key) == 0)//判断是否相等
return p1 + i;
else if (ip_frag_key_is_empty(&p1[i].key))
else if (ip_frag_key_is_empty(&p1[i].key))//判断是否为空
empty = (empty == NULL) ? (p1 + i) : empty;
else if (max_cycles + p1[i].start < tms)
else if (max_cycles + p1[i].start < tms)//判断是否超时
old = (old == NULL) ? (p1 + i) : old;

if (p2->key.key_len == IPV4_KEYLEN)
Expand All @@ -404,11 +424,11 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl,
p2, i, assoc,
IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start);

if (ip_frag_key_cmp(key, &p2[i].key) == 0)
if (ip_frag_key_cmp(key, &p2[i].key) == 0)//判断是否相等
return p2 + i;
else if (ip_frag_key_is_empty(&p2[i].key))
else if (ip_frag_key_is_empty(&p2[i].key))//判断是否为空
empty = (empty == NULL) ?( p2 + i) : empty;
else if (max_cycles + p2[i].start < tms)
else if (max_cycles + p2[i].start < tms)//判断是否超时
old = (old == NULL) ? (p2 + i) : old;
}

Expand Down
3 changes: 3 additions & 0 deletions dpdk-16.04/dpdk-16.04/lib/librte_ip_frag/rte_ip_frag.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,9 @@ rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) {
ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);

//判断MF为1,不为0表示后面有分片,所以此报文是分片
//判断数据偏移字段是否为0,不为0,表示此为分片
//因此,如果MF字段为0,并且数据偏移字段为0,那么次不是一个分片报文
return ip_flag != 0 || ip_ofs != 0;
}

Expand Down
13 changes: 8 additions & 5 deletions dpdk-16.04/dpdk-16.04/lib/librte_ip_frag/rte_ip_frag_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,21 @@ rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries,
size_t sz;
uint64_t nb_entries;

//计算条目个数
nb_entries = rte_align32pow2(bucket_num);
nb_entries *= bucket_entries;
nb_entries *= IP_FRAG_HASH_FNUM;

/* check input parameters. */
//检查输入参数:每个篮子的个数是否是2的幂;条目个数是否超出32位表示范围及是否为0,是否小于最大条目个数。
if (rte_is_power_of_2(bucket_entries) == 0 ||
nb_entries > UINT32_MAX || nb_entries == 0 ||
nb_entries < max_entries) {
RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__);
return NULL;
}

//计算分配空间大小,并调用函数rte_zmalloc_socket分配空间
sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]);
if ((tbl = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE,
socket_id)) == NULL) {
Expand All @@ -98,11 +101,11 @@ rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries,
RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n",
__func__, sz, socket_id);

tbl->max_cycles = max_cycles;
tbl->max_entries = max_entries;
tbl->nb_entries = (uint32_t)nb_entries;
tbl->nb_buckets = bucket_num;
tbl->bucket_entries = bucket_entries;
tbl->max_cycles = max_cycles; //老化时间
tbl->max_entries = max_entries; //最大条目个数
tbl->nb_entries = (uint32_t)nb_entries; //分配的条目个数
tbl->nb_buckets = bucket_num; //篮子的个数
tbl->bucket_entries = bucket_entries; //每个篮子的个数
tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1);

TAILQ_INIT(&(tbl->lru));
Expand Down
12 changes: 6 additions & 6 deletions dpdk-16.04/dpdk-16.04/lib/librte_ip_frag/rte_ipv4_reassembly.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,18 +127,18 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
uint16_t flag_offset, ip_ofs, ip_flag;

flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset);
ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);
ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);
ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK);//偏移
ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG);//MF字段

psd = (unaligned_uint64_t *)&ip_hdr->src_addr;
/* use first 8 bytes only */
key.src_dst[0] = psd[0];
key.id = ip_hdr->packet_id;
key.key_len = IPV4_KEYLEN;

ip_ofs *= IPV4_HDR_OFFSET_UNITS;
ip_ofs *= IPV4_HDR_OFFSET_UNITS; //剩以8表示真实偏移
ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) -
mb->l3_len);
mb->l3_len);//IP负载的大小

IP_FRAG_LOG(DEBUG, "%s:%d:\n"
"mbuf: %p, tms: %" PRIu64
Expand All @@ -150,7 +150,7 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries,
tbl->use_entries);

/* try to find/add entry into the fragment's table. */
//通过Cuckoo hash算法得到分片的ip_frag_pkt结构体,或者得到一个空的结构体
if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {
IP_FRAG_MBUF2DR(dr, mb);
return NULL;
Expand All @@ -166,7 +166,7 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
fp->total_size, fp->frag_size, fp->last_idx);


/* process the fragmented packet. */
//如果能够重组所有分配,那么重组如果不能,那么将新的分片存储到结构体中
mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);
ip_frag_inuse(tbl, fp);

Expand Down

0 comments on commit 38e3fbc

Please sign in to comment.