forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Jia He says: ==================== Reduce cache miss for snmp_fold_field In a PowerPc server with large cpu number(160), besides commit a3a7737 ("net: Optimize snmp stat aggregation by walking all the percpu data at once"), I watched several other snmp_fold_field callsites which would cause high cache miss rate. test source code: ================ My simple test case, which read from the procfs items endlessly: /***********************************************************/ int main(int argc, char **argv) { int i; int fd = -1 ; int rdsize = 0; char buf[LINELEN+1]; buf[LINELEN] = 0; memset(buf,0,LINELEN); if(1 >= argc) { printf("file name empty\n"); return -1; } fd = open(argv[1], O_RDWR, 0644); if(0 > fd){ printf("open error\n"); return -2; } for(i=0;i<0xffffffff;i++) { while(0 < (rdsize = read(fd,buf,LINELEN))){ //nothing here } lseek(fd, 0, SEEK_SET); } close(fd); return 0; } /**********************************************************/ compile and run: ================ gcc test.c -o test perf stat -d -e cache-misses ./test /proc/net/snmp perf stat -d -e cache-misses ./test /proc/net/snmp6 perf stat -d -e cache-misses ./test /proc/net/sctp/snmp perf stat -d -e cache-misses ./test /proc/net/xfrm_stat before the patch set: ==================== Performance counter stats for 'system wide': 355911097 cache-misses [40.08%] 2356829300 L1-dcache-loads [60.04%] 355642645 L1-dcache-load-misses # 15.09% of all L1-dcache hits [60.02%] 346544541 LLC-loads [59.97%] 389763 LLC-load-misses # 0.11% of all LL-cache hits [40.02%] 6.245162638 seconds time elapsed After the patch set: =================== Performance counter stats for 'system wide': 194992476 cache-misses [40.03%] 6718051877 L1-dcache-loads [60.07%] 194871921 L1-dcache-load-misses # 2.90% of all L1-dcache hits [60.11%] 187632232 LLC-loads [60.04%] 464466 LLC-load-misses # 0.25% of all LL-cache hits [39.89%] 6.868422769 seconds time elapsed The cache-miss rate can be reduced from 15% to 2.9% changelog ========= v6: - correct v5 v5: - order local variables from longest to shortest line v4: - move memset into one block of if statement in snmp6_seq_show_item - remove the changes in netstat_seq_show considerred the stack usage is too large v3: - introduce generic interface (suggested by Marcelo Ricardo Leitner) - use max_t instead of self defined macro (suggested by David Miller) v2: - fix bug in udplite statistics. - snmp_seq_show is split into 2 parts ==================== Signed-off-by: David S. Miller <[email protected]>
- Loading branch information
Showing
6 changed files
with
129 additions
and
58 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,8 @@ | |
#include <net/sock.h> | ||
#include <net/raw.h> | ||
|
||
#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) | ||
|
||
/* | ||
* Report socket allocation statistics [[email protected]] | ||
*/ | ||
|
@@ -356,95 +358,117 @@ static void icmp_put(struct seq_file *seq) | |
atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; | ||
|
||
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); | ||
for (i = 0; icmpmibmap[i].name != NULL; i++) | ||
for (i = 0; icmpmibmap[i].name; i++) | ||
seq_printf(seq, " In%s", icmpmibmap[i].name); | ||
seq_puts(seq, " OutMsgs OutErrors"); | ||
for (i = 0; icmpmibmap[i].name != NULL; i++) | ||
for (i = 0; icmpmibmap[i].name; i++) | ||
seq_printf(seq, " Out%s", icmpmibmap[i].name); | ||
seq_printf(seq, "\nIcmp: %lu %lu %lu", | ||
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), | ||
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), | ||
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); | ||
for (i = 0; icmpmibmap[i].name != NULL; i++) | ||
for (i = 0; icmpmibmap[i].name; i++) | ||
seq_printf(seq, " %lu", | ||
atomic_long_read(ptr + icmpmibmap[i].index)); | ||
seq_printf(seq, " %lu %lu", | ||
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), | ||
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); | ||
for (i = 0; icmpmibmap[i].name != NULL; i++) | ||
for (i = 0; icmpmibmap[i].name; i++) | ||
seq_printf(seq, " %lu", | ||
atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); | ||
} | ||
|
||
/* | ||
* Called from the PROCfs module. This outputs /proc/net/snmp. | ||
*/ | ||
static int snmp_seq_show(struct seq_file *seq, void *v) | ||
static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) | ||
{ | ||
int i; | ||
struct net *net = seq->private; | ||
u64 buff64[IPSTATS_MIB_MAX]; | ||
int i; | ||
|
||
seq_puts(seq, "Ip: Forwarding DefaultTTL"); | ||
memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); | ||
|
||
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) | ||
seq_puts(seq, "Ip: Forwarding DefaultTTL"); | ||
for (i = 0; snmp4_ipstats_list[i].name; i++) | ||
seq_printf(seq, " %s", snmp4_ipstats_list[i].name); | ||
|
||
seq_printf(seq, "\nIp: %d %d", | ||
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, | ||
net->ipv4.sysctl_ip_default_ttl); | ||
|
||
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); | ||
for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) | ||
seq_printf(seq, " %llu", | ||
snmp_fold_field64(net->mib.ip_statistics, | ||
snmp4_ipstats_list[i].entry, | ||
offsetof(struct ipstats_mib, syncp))); | ||
snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, | ||
net->mib.ip_statistics, | ||
offsetof(struct ipstats_mib, syncp)); | ||
for (i = 0; snmp4_ipstats_list[i].name; i++) | ||
seq_printf(seq, " %llu", buff64[i]); | ||
|
||
icmp_put(seq); /* RFC 2011 compatibility */ | ||
icmpmsg_put(seq); | ||
return 0; | ||
} | ||
|
||
static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) | ||
{ | ||
unsigned long buff[TCPUDP_MIB_MAX]; | ||
struct net *net = seq->private; | ||
int i; | ||
|
||
memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); | ||
|
||
seq_puts(seq, "\nTcp:"); | ||
for (i = 0; snmp4_tcp_list[i].name != NULL; i++) | ||
for (i = 0; snmp4_tcp_list[i].name; i++) | ||
seq_printf(seq, " %s", snmp4_tcp_list[i].name); | ||
|
||
seq_puts(seq, "\nTcp:"); | ||
for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { | ||
snmp_get_cpu_field_batch(buff, snmp4_tcp_list, | ||
net->mib.tcp_statistics); | ||
for (i = 0; snmp4_tcp_list[i].name; i++) { | ||
/* MaxConn field is signed, RFC 2012 */ | ||
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) | ||
seq_printf(seq, " %ld", | ||
snmp_fold_field(net->mib.tcp_statistics, | ||
snmp4_tcp_list[i].entry)); | ||
seq_printf(seq, " %ld", buff[i]); | ||
else | ||
seq_printf(seq, " %lu", | ||
snmp_fold_field(net->mib.tcp_statistics, | ||
snmp4_tcp_list[i].entry)); | ||
seq_printf(seq, " %lu", buff[i]); | ||
} | ||
|
||
memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); | ||
|
||
snmp_get_cpu_field_batch(buff, snmp4_udp_list, | ||
net->mib.udp_statistics); | ||
seq_puts(seq, "\nUdp:"); | ||
for (i = 0; snmp4_udp_list[i].name != NULL; i++) | ||
for (i = 0; snmp4_udp_list[i].name; i++) | ||
seq_printf(seq, " %s", snmp4_udp_list[i].name); | ||
|
||
seq_puts(seq, "\nUdp:"); | ||
for (i = 0; snmp4_udp_list[i].name != NULL; i++) | ||
seq_printf(seq, " %lu", | ||
snmp_fold_field(net->mib.udp_statistics, | ||
snmp4_udp_list[i].entry)); | ||
for (i = 0; snmp4_udp_list[i].name; i++) | ||
seq_printf(seq, " %lu", buff[i]); | ||
|
||
memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); | ||
|
||
/* the UDP and UDP-Lite MIBs are the same */ | ||
seq_puts(seq, "\nUdpLite:"); | ||
for (i = 0; snmp4_udp_list[i].name != NULL; i++) | ||
snmp_get_cpu_field_batch(buff, snmp4_udp_list, | ||
net->mib.udplite_statistics); | ||
for (i = 0; snmp4_udp_list[i].name; i++) | ||
seq_printf(seq, " %s", snmp4_udp_list[i].name); | ||
|
||
seq_puts(seq, "\nUdpLite:"); | ||
for (i = 0; snmp4_udp_list[i].name != NULL; i++) | ||
seq_printf(seq, " %lu", | ||
snmp_fold_field(net->mib.udplite_statistics, | ||
snmp4_udp_list[i].entry)); | ||
for (i = 0; snmp4_udp_list[i].name; i++) | ||
seq_printf(seq, " %lu", buff[i]); | ||
|
||
seq_putc(seq, '\n'); | ||
return 0; | ||
} | ||
|
||
static int snmp_seq_show(struct seq_file *seq, void *v) | ||
{ | ||
snmp_seq_show_ipstats(seq, v); | ||
|
||
icmp_put(seq); /* RFC 2011 compatibility */ | ||
icmpmsg_put(seq); | ||
|
||
snmp_seq_show_tcp_udp(seq, v); | ||
|
||
return 0; | ||
} | ||
|
||
static int snmp_seq_open(struct inode *inode, struct file *file) | ||
{ | ||
return single_open_net(inode, file, snmp_seq_show); | ||
|
@@ -469,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v) | |
struct net *net = seq->private; | ||
|
||
seq_puts(seq, "TcpExt:"); | ||
for (i = 0; snmp4_net_list[i].name != NULL; i++) | ||
for (i = 0; snmp4_net_list[i].name; i++) | ||
seq_printf(seq, " %s", snmp4_net_list[i].name); | ||
|
||
seq_puts(seq, "\nTcpExt:"); | ||
for (i = 0; snmp4_net_list[i].name != NULL; i++) | ||
for (i = 0; snmp4_net_list[i].name; i++) | ||
seq_printf(seq, " %lu", | ||
snmp_fold_field(net->mib.net_statistics, | ||
snmp4_net_list[i].entry)); | ||
|
||
seq_puts(seq, "\nIpExt:"); | ||
for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) | ||
for (i = 0; snmp4_ipextstats_list[i].name; i++) | ||
seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); | ||
|
||
seq_puts(seq, "\nIpExt:"); | ||
for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) | ||
for (i = 0; snmp4_ipextstats_list[i].name; i++) | ||
seq_printf(seq, " %llu", | ||
snmp_fold_field64(net->mib.ip_statistics, | ||
snmp4_ipextstats_list[i].entry, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters