dpdk Ipv4_reassembly
作者:互联网
零、参考
30. IP Fragmentation and Reassembly Library — Data Plane Development Kit 20.02.1 documentation (dpdk.org)http://doc.dpdk.org/guides-20.02/prog_guide/ip_fragment_reassembly_lib.html#ip-fragment-table 源码:DPDK-21.11/lib/ip_frag
一、重组步骤
1、用 “<src ip>, <dst ip>, <pkt id>” 生成的 key 在 fragment table 中查找。
2、如果查找到 entry,检查 entry 是否超时。如果超时,那么释放所以之前接收的分片,并移除他们在 entry 中的相关信息。
3、如果使用那个key在表中没有找到对应的 entry,那么会尝试使用两种方法来创建一个新的:
- 使用一个空的 entry
- 删除一个已经超时的 entry,释放相关的 Mbuf,重新存储一个新的 key 在里面。
4、根据新到的分片更新 entry 信息,检查数据包是否能被重组(entry 中已经包含了所有的分片)
- 如果检查分片都已经到齐了,则进行重组,把 entry 标记为空,同时,返回重组后的 mbuf 指针给调用者。
- 如果没有到齐,则返回 NULL。
二、源码分析
1、相关数据结构描述
/* fragmented mbuf */
struct ip_frag {
uint16_t ofs; /* offset into the packet */
uint16_t len; /* length of fragment */
struct rte_mbuf *mb; /* fragment mbuf */
};
/*
* key: <src addr, dst_addr, id> to uniquely identify fragmented datagram.
*/
struct ip_frag_key {
uint64_t src_dst[4];
/* src and dst address, only first 8 bytes used for IPv4 */
RTE_STD_C11
union {
uint64_t id_key_len; /* combined for easy fetch */
__extension__
struct {
uint32_t id; /* packet id */
uint32_t key_len; /* src/dst key length */
};
};
};
/*
* Fragmented packet to reassemble.
* First two entries in the frags[] array are for the last and first fragments.
*/
struct ip_frag_pkt {
RTE_TAILQ_ENTRY(ip_frag_pkt) lru; /* LRU list */
struct ip_frag_key key; /* fragmentation key */
uint64_t start; /* creation timestamp */
uint32_t total_size; /* expected reassembled size */
uint32_t frag_size; /* size of fragments received */
uint32_t last_idx; /* index of next entry to fill */
struct ip_frag frags[IP_MAX_FRAG_NUM]; /* fragments */
} __rte_cache_aligned;
/* fragmentation table statistics */
struct ip_frag_tbl_stat {
uint64_t find_num; /* total # of find/insert attempts. */
uint64_t add_num; /* # of add ops. */
uint64_t del_num; /* # of del ops. */
uint64_t reuse_num; /* # of reuse (del/add) ops. */
uint64_t fail_total; /* total # of add failures. */
uint64_t fail_nospace; /* # of 'no space' add failures. */
} __rte_cache_aligned;
/* fragmentation table */
struct rte_ip_frag_tbl {
uint64_t max_cycles; /* ttl for table entries. */
uint32_t entry_mask; /* hash value mask. */
uint32_t max_entries; /* max entries allowed. */
uint32_t use_entries; /* entries in use. */
uint32_t bucket_entries; /* hash associativity. */
uint32_t nb_entries; /* total size of the table. */
uint32_t nb_buckets; /* num of associativity lines. */
struct ip_frag_pkt *last; /* last used entry. */
struct ip_pkt_list lru; /* LRU list for table entries. */
struct ip_frag_tbl_stat stat; /* statistics counters. */
__extension__ struct ip_frag_pkt pkt[0]; /* hash table. */
};
2、组包过程中的函数调用
/*
* Process new mbuf with fragment of IPV4 packet.
* Incoming mbuf should have it's l2_len/l3_len fields setup correctly.
* @param tbl
* Table where to lookup/add the fragmented packet.
* @param mb
* Incoming mbuf with IPV4 fragment.
* @param tms
* Fragment arrival timestamp.
* @param ip_hdr
* Pointer to the IPV4 header inside the fragment.
* @return
* Pointer to mbuf for reassembled packet, or NULL if:
* - an error occurred.
* - not all fragments of the packet are collected yet.
*/
struct rte_mbuf *
rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl,
struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms,
struct rte_ipv4_hdr *ip_hdr)
{
...
/* try to find/add entry into the fragment's table. */
if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) {
IP_FRAG_MBUF2DR(dr, mb);
return NULL;
}
...
/* process the fragmented packet. */
mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag);
ip_frag_inuse(tbl, fp);
...
}
/*
* Find an entry in the table for the corresponding fragment.
* If such entry is not present, then allocate a new one.
* If the entry is stale, then free and reuse it.
*/
struct ip_frag_pkt *
ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
const struct ip_frag_key *key, uint64_t tms)
{
struct ip_frag_pkt *pkt, *free, *stale, *lru;
uint64_t max_cycles;
/*
* Actually the two line below are totally redundant.
* they are here, just to make gcc 4.6 happy.
*/
free = NULL;
stale = NULL;
max_cycles = tbl->max_cycles;
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1);
if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) {
/*timed-out entry, free and invalidate it*/
if (stale != NULL) {
ip_frag_tbl_del(tbl, dr, stale);
free = stale;
/*
* we found a free entry, check if we can use it.
* If we run out of free entries in the table, then
* check if we have a timed out entry to delete.
*/
} else if (free != NULL &&
tbl->max_entries <= tbl->use_entries) {
lru = TAILQ_FIRST(&tbl->lru);
if (max_cycles + lru->start < tms) {
ip_frag_tbl_del(tbl, dr, lru);
} else {
free = NULL;
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat,
fail_nospace, 1);
}
}
/* found a free entry to reuse. */
if (free != NULL) {
ip_frag_tbl_add(tbl, free, key, tms);
pkt = free;
}
/*
* we found the flow, but it is already timed out,
* so free associated resources, reposition it in the LRU list,
* and reuse it.
*/
} else if (max_cycles + pkt->start < tms) {
ip_frag_tbl_reuse(tbl, dr, pkt, tms);
}
IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL));
tbl->last = pkt;
return pkt;
}
truct rte_mbuf *
ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr,
struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags)
{
uint32_t idx;
fp->frag_size += len;
/* this is the first fragment. */
if (ofs == 0) {
idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ?
IP_FIRST_FRAG_IDX : UINT32_MAX;
/* this is the last fragment. */
} else if (more_frags == 0) {
fp->total_size = ofs + len;
idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ?
IP_LAST_FRAG_IDX : UINT32_MAX;
/* this is the intermediate fragment. */
} else if ((idx = fp->last_idx) < RTE_DIM(fp->frags)) {
fp->last_idx++;
}
...
fp->frags[idx].ofs = ofs;
fp->frags[idx].len = len;
fp->frags[idx].mb = mb;
mb = NULL;
/* not all fragments are collected yet. */
if (likely (fp->frag_size < fp->total_size)) {
return mb;
/* if we collected all fragments, then try to reassemble. */
} else if (fp->frag_size == fp->total_size &&
fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) {
if (fp->key.key_len == IPV4_KEYLEN)
mb = ipv4_frag_reassemble(fp);
else
mb = ipv6_frag_reassemble(fp);
}
...
/* we are done with that entry, invalidate it. */
ip_frag_key_invalidate(&fp->key);
return mb;
}
/*
* Reassemble fragments into one packet.
*/
struct rte_mbuf *
ipv4_frag_reassemble(struct ip_frag_pkt *fp)
{
struct rte_ipv4_hdr *ip_hdr;
struct rte_mbuf *m, *prev;
uint32_t i, n, ofs, first_len;
uint32_t curr_idx = 0;
first_len = fp->frags[IP_FIRST_FRAG_IDX].len;
n = fp->last_idx - 1;
/*start from the last fragment. */
m = fp->frags[IP_LAST_FRAG_IDX].mb;
ofs = fp->frags[IP_LAST_FRAG_IDX].ofs;
curr_idx = IP_LAST_FRAG_IDX;
while (ofs != first_len) {
prev = m;
for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) {
/* previous fragment found. */
if(fp->frags[i].ofs + fp->frags[i].len == ofs) {
RTE_ASSERT(curr_idx != i);
/* adjust start of the last fragment data. */
rte_pktmbuf_adj(m,
(uint16_t)(m->l2_len + m->l3_len));
rte_pktmbuf_chain(fp->frags[i].mb, m);
/* this mbuf should not be accessed directly */
fp->frags[curr_idx].mb = NULL;
curr_idx = i;
/* update our last fragment and offset. */
m = fp->frags[i].mb;
ofs = fp->frags[i].ofs;
}
}
/* error - hole in the packet. */
if (m == prev) {
return NULL;
}
}
/* chain with the first fragment. */
rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len));
rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m);
fp->frags[curr_idx].mb = NULL;
m = fp->frags[IP_FIRST_FRAG_IDX].mb;
fp->frags[IP_FIRST_FRAG_IDX].mb = NULL;
/* update ipv4 header for the reassembled packet */
ip_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *, m->l2_len);
ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size +
m->l3_len));
ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset &
rte_cpu_to_be_16(RTE_IPV4_HDR_DF_FLAG));
ip_hdr->hdr_checksum = 0;
return m;
}
标签:fp,reassembly,struct,rte,ip,frag,Ipv4,tbl,dpdk 来源: https://blog.csdn.net/qq_37437983/article/details/122598085