forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
samples: bpf: large eBPF program in C
sockex2_kern.c is purposefully large eBPF program in C. llvm compiles ~200 lines of C code into ~300 eBPF instructions. It's similar to __skb_flow_dissect() to demonstrate that complex packet parsing can be done by eBPF. Then it uses (struct flow_keys)->dst IP address (or hash of ipv6 dst) to keep stats of number of packets per IP. User space loads eBPF program, attaches it to loopback interface and prints dest_ip->#packets stats every second. Usage: $sudo samples/bpf/sockex2 ip 127.0.0.1 count 19 ip 127.0.0.1 count 178115 ip 127.0.0.1 count 369437 ip 127.0.0.1 count 559841 ip 127.0.0.1 count 750539 Signed-off-by: Alexei Starovoitov <[email protected]> Signed-off-by: David S. Miller <[email protected]>
- Loading branch information
Showing
3 changed files
with
263 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
#include <uapi/linux/bpf.h> | ||
#include "bpf_helpers.h" | ||
#include <uapi/linux/in.h> | ||
#include <uapi/linux/if.h> | ||
#include <uapi/linux/if_ether.h> | ||
#include <uapi/linux/ip.h> | ||
#include <uapi/linux/ipv6.h> | ||
#include <uapi/linux/if_tunnel.h> | ||
#define IP_MF 0x2000 | ||
#define IP_OFFSET 0x1FFF | ||
|
||
struct vlan_hdr { | ||
__be16 h_vlan_TCI; | ||
__be16 h_vlan_encapsulated_proto; | ||
}; | ||
|
||
struct flow_keys { | ||
__be32 src; | ||
__be32 dst; | ||
union { | ||
__be32 ports; | ||
__be16 port16[2]; | ||
}; | ||
__u16 thoff; | ||
__u8 ip_proto; | ||
}; | ||
|
||
static inline int proto_ports_offset(__u64 proto) | ||
{ | ||
switch (proto) { | ||
case IPPROTO_TCP: | ||
case IPPROTO_UDP: | ||
case IPPROTO_DCCP: | ||
case IPPROTO_ESP: | ||
case IPPROTO_SCTP: | ||
case IPPROTO_UDPLITE: | ||
return 0; | ||
case IPPROTO_AH: | ||
return 4; | ||
default: | ||
return 0; | ||
} | ||
} | ||
|
||
static inline int ip_is_fragment(struct sk_buff *ctx, __u64 nhoff) | ||
{ | ||
return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off)) | ||
& (IP_MF | IP_OFFSET); | ||
} | ||
|
||
static inline __u32 ipv6_addr_hash(struct sk_buff *ctx, __u64 off) | ||
{ | ||
__u64 w0 = load_word(ctx, off); | ||
__u64 w1 = load_word(ctx, off + 4); | ||
__u64 w2 = load_word(ctx, off + 8); | ||
__u64 w3 = load_word(ctx, off + 12); | ||
|
||
return (__u32)(w0 ^ w1 ^ w2 ^ w3); | ||
} | ||
|
||
static inline __u64 parse_ip(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, | ||
struct flow_keys *flow) | ||
{ | ||
__u64 verlen; | ||
|
||
if (unlikely(ip_is_fragment(skb, nhoff))) | ||
*ip_proto = 0; | ||
else | ||
*ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol)); | ||
|
||
if (*ip_proto != IPPROTO_GRE) { | ||
flow->src = load_word(skb, nhoff + offsetof(struct iphdr, saddr)); | ||
flow->dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr)); | ||
} | ||
|
||
verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/); | ||
if (likely(verlen == 0x45)) | ||
nhoff += 20; | ||
else | ||
nhoff += (verlen & 0xF) << 2; | ||
|
||
return nhoff; | ||
} | ||
|
||
static inline __u64 parse_ipv6(struct sk_buff *skb, __u64 nhoff, __u64 *ip_proto, | ||
struct flow_keys *flow) | ||
{ | ||
*ip_proto = load_byte(skb, | ||
nhoff + offsetof(struct ipv6hdr, nexthdr)); | ||
flow->src = ipv6_addr_hash(skb, | ||
nhoff + offsetof(struct ipv6hdr, saddr)); | ||
flow->dst = ipv6_addr_hash(skb, | ||
nhoff + offsetof(struct ipv6hdr, daddr)); | ||
nhoff += sizeof(struct ipv6hdr); | ||
|
||
return nhoff; | ||
} | ||
|
||
static inline bool flow_dissector(struct sk_buff *skb, struct flow_keys *flow) | ||
{ | ||
__u64 nhoff = ETH_HLEN; | ||
__u64 ip_proto; | ||
__u64 proto = load_half(skb, 12); | ||
int poff; | ||
|
||
if (proto == ETH_P_8021AD) { | ||
proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, | ||
h_vlan_encapsulated_proto)); | ||
nhoff += sizeof(struct vlan_hdr); | ||
} | ||
|
||
if (proto == ETH_P_8021Q) { | ||
proto = load_half(skb, nhoff + offsetof(struct vlan_hdr, | ||
h_vlan_encapsulated_proto)); | ||
nhoff += sizeof(struct vlan_hdr); | ||
} | ||
|
||
if (likely(proto == ETH_P_IP)) | ||
nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
else if (proto == ETH_P_IPV6) | ||
nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
else | ||
return false; | ||
|
||
switch (ip_proto) { | ||
case IPPROTO_GRE: { | ||
struct gre_hdr { | ||
__be16 flags; | ||
__be16 proto; | ||
}; | ||
|
||
__u64 gre_flags = load_half(skb, | ||
nhoff + offsetof(struct gre_hdr, flags)); | ||
__u64 gre_proto = load_half(skb, | ||
nhoff + offsetof(struct gre_hdr, proto)); | ||
|
||
if (gre_flags & (GRE_VERSION|GRE_ROUTING)) | ||
break; | ||
|
||
proto = gre_proto; | ||
nhoff += 4; | ||
if (gre_flags & GRE_CSUM) | ||
nhoff += 4; | ||
if (gre_flags & GRE_KEY) | ||
nhoff += 4; | ||
if (gre_flags & GRE_SEQ) | ||
nhoff += 4; | ||
|
||
if (proto == ETH_P_8021Q) { | ||
proto = load_half(skb, | ||
nhoff + offsetof(struct vlan_hdr, | ||
h_vlan_encapsulated_proto)); | ||
nhoff += sizeof(struct vlan_hdr); | ||
} | ||
|
||
if (proto == ETH_P_IP) | ||
nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
else if (proto == ETH_P_IPV6) | ||
nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
else | ||
return false; | ||
break; | ||
} | ||
case IPPROTO_IPIP: | ||
nhoff = parse_ip(skb, nhoff, &ip_proto, flow); | ||
break; | ||
case IPPROTO_IPV6: | ||
nhoff = parse_ipv6(skb, nhoff, &ip_proto, flow); | ||
break; | ||
default: | ||
break; | ||
} | ||
|
||
flow->ip_proto = ip_proto; | ||
poff = proto_ports_offset(ip_proto); | ||
if (poff >= 0) { | ||
nhoff += poff; | ||
flow->ports = load_word(skb, nhoff); | ||
} | ||
|
||
flow->thoff = (__u16) nhoff; | ||
|
||
return true; | ||
} | ||
|
||
struct bpf_map_def SEC("maps") hash_map = { | ||
.type = BPF_MAP_TYPE_HASH, | ||
.key_size = sizeof(__be32), | ||
.value_size = sizeof(long), | ||
.max_entries = 1024, | ||
}; | ||
|
||
SEC("socket2") | ||
int bpf_prog2(struct sk_buff *skb) | ||
{ | ||
struct flow_keys flow; | ||
long *value; | ||
u32 key; | ||
|
||
if (!flow_dissector(skb, &flow)) | ||
return 0; | ||
|
||
key = flow.dst; | ||
value = bpf_map_lookup_elem(&hash_map, &key); | ||
if (value) { | ||
__sync_fetch_and_add(value, 1); | ||
} else { | ||
long val = 1; | ||
|
||
bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY); | ||
} | ||
return 0; | ||
} | ||
|
||
char _license[] SEC("license") = "GPL"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#include <stdio.h> | ||
#include <assert.h> | ||
#include <linux/bpf.h> | ||
#include "libbpf.h" | ||
#include "bpf_load.h" | ||
#include <unistd.h> | ||
#include <arpa/inet.h> | ||
|
||
int main(int ac, char **argv) | ||
{ | ||
char filename[256]; | ||
FILE *f; | ||
int i, sock; | ||
|
||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); | ||
|
||
if (load_bpf_file(filename)) { | ||
printf("%s", bpf_log_buf); | ||
return 1; | ||
} | ||
|
||
sock = open_raw_sock("lo"); | ||
|
||
assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, prog_fd, | ||
sizeof(prog_fd[0])) == 0); | ||
|
||
f = popen("ping -c5 localhost", "r"); | ||
(void) f; | ||
|
||
for (i = 0; i < 5; i++) { | ||
int key = 0, next_key; | ||
long long value; | ||
|
||
while (bpf_get_next_key(map_fd[0], &key, &next_key) == 0) { | ||
bpf_lookup_elem(map_fd[0], &next_key, &value); | ||
printf("ip %s count %lld\n", | ||
inet_ntoa((struct in_addr){htonl(next_key)}), | ||
value); | ||
key = next_key; | ||
} | ||
sleep(1); | ||
} | ||
return 0; | ||
} |