From c792eba12cd200e67d08e20a22763ff1970a685b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 27 May 2016 14:38:31 -0600 Subject: [PATCH 01/17] ntb_tool: Fix infinite loop bug when writing spad/peer_spad file If you tried to write two spads in one line, as per the example: root@peer# echo '0 0x01010101 1 0x7f7f7f7f' > $DBG_DIR/peer_spad then the CPU would freeze in an infinite loop. This wasn't immediately obvious but 'pos' was not incrementing the buffer, so after reading the second pair of values, 'pos' would once again be 3 and it would re-read the second pair of values ad infinitum. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_tool.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 6f5dc6ca673d37..209ef7ceb98a8f 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -268,7 +268,7 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, { int spad_idx; u32 spad_val; - char *buf; + char *buf, *buf_ptr; int pos, n; ssize_t rc; @@ -288,14 +288,15 @@ static ssize_t tool_spadfn_write(struct tool_ctx *tc, } buf[size] = 0; - - n = sscanf(buf, "%d %i%n", &spad_idx, &spad_val, &pos); + buf_ptr = buf; + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); while (n == 2) { + buf_ptr += pos; rc = spad_write_fn(tc->ntb, spad_idx, spad_val); if (rc) break; - n = sscanf(buf + pos, "%d %i%n", &spad_idx, &spad_val, &pos); + n = sscanf(buf_ptr, "%d %i%n", &spad_idx, &spad_val, &pos); } if (n < 0) From 625f0802e84e47760959a5015fdc704d809fd6c8 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:08 -0600 Subject: [PATCH 02/17] ntb_tool: BUG: Ensure the buffer size is large enough to return all spads On hardware with 32 scratchpad registers the spad field in ntb tool could chop off the end. The maximum buffer size is increased from 256 to 15 times the number or scratchpads. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_tool.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 209ef7ceb98a8f..a0ead31cd79946 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -239,7 +239,14 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, if (!spad_read_fn) return -EINVAL; - buf_size = min_t(size_t, size, 0x100); + spad_count = ntb_spad_count(tc->ntb); + + /* + * We multiply the number of spads by 15 to get the buffer size + * this is from 3 for the %d, 10 for the largest hex value + * (0x00000000) and 2 for the tab and line feed. + */ + buf_size = min_t(size_t, size, spad_count * 15); buf = kmalloc(buf_size, GFP_KERNEL); if (!buf) @@ -247,7 +254,6 @@ static ssize_t tool_spadfn_read(struct tool_ctx *tc, char __user *ubuf, pos = 0; - spad_count = ntb_spad_count(tc->ntb); for (i = 0; i < spad_count; ++i) { pos += scnprintf(buf + pos, buf_size - pos, "%d\t%#x\n", i, spad_read_fn(tc->ntb, i)); From a754a8fcaf383be3c5fcc6c3c08e36d9f3005988 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 8 Apr 2016 10:49:06 -0700 Subject: [PATCH 03/17] NTB: allocate number transport entries depending on size of ring size Currently we only allocate a fixed default number of descriptors for the tx and rx side. We should dynamically resize it to the number of descriptors resides in the transport rings. We should know the number of transmit descriptors at initializaiton. We will allocate the default number of descriptors for receive side and allocate additional ones when we know the actual max entries for receive. Signed-off-by: Dave Jiang Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 2ef9d913086460..6db8c8528f266b 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -153,6 +153,7 @@ struct ntb_transport_qp { unsigned int rx_index; unsigned int rx_max_entry; unsigned int rx_max_frame; + unsigned int rx_alloc_entry; dma_cookie_t last_cookie; struct tasklet_struct rxc_db_work; @@ -480,7 +481,9 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, out_offset += snprintf(buf + out_offset, out_count - out_offset, "rx_index - \t%u\n", qp->rx_index); out_offset += snprintf(buf + out_offset, out_count - out_offset, - "rx_max_entry - \t%u\n\n", qp->rx_max_entry); + "rx_max_entry - \t%u\n", qp->rx_max_entry); + out_offset += snprintf(buf + out_offset, out_count - out_offset, + "rx_alloc_entry - \t%u\n\n", qp->rx_alloc_entry); out_offset += snprintf(buf + out_offset, out_count - out_offset, "tx_bytes - \t%llu\n", qp->tx_bytes); @@ -597,9 +600,12 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, { struct ntb_transport_qp *qp = &nt->qp_vec[qp_num]; struct ntb_transport_mw *mw; + struct ntb_dev *ndev = nt->ndev; + struct ntb_queue_entry *entry; unsigned int rx_size, num_qps_mw; unsigned int mw_num, mw_count, qp_count; unsigned int i; + int node; mw_count = nt->mw_count; qp_count = nt->qp_count; @@ -626,6 +632,23 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, qp->rx_max_entry = rx_size / qp->rx_max_frame; qp->rx_index = 0; + /* + * Checking to see if we have more entries than the default. + * We should add additional entries if that is the case so we + * can be in sync with the transport frames. + */ + node = dev_to_node(&ndev->dev); + for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) { + entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); + if (!entry) + return -ENOMEM; + + entry->qp = qp; + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, + &qp->rx_free_q); + qp->rx_alloc_entry++; + } + qp->remote_rx_info->entry = qp->rx_max_entry - 1; /* setup the hdr offsets with 0's */ @@ -1722,8 +1745,9 @@ ntb_transport_create_queue(void *data, struct device *client_dev, ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); } + qp->rx_alloc_entry = NTB_QP_DEF_NUM_ENTRIES; - for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) { + for (i = 0; i < qp->tx_max_entry; i++) { entry = kzalloc_node(sizeof(*entry), GFP_ATOMIC, node); if (!entry) goto err2; @@ -1744,6 +1768,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); err1: + qp->rx_alloc_entry = 0; while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); if (qp->tx_dma_chan) From 4aae977721f0367809cdc94584b6945073d9fe10 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 3 Jun 2016 14:50:31 -0600 Subject: [PATCH 04/17] ntb_perf: Allow limiting the size of the memory windows On my system, dma_alloc_coherent won't produce memory anywhere near the size of the BAR. So I needed a way to limit this. It's pretty much copied straight from ntb_transport. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 8dfce9c9aad09a..30635c89320cc4 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -83,6 +83,10 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); static struct dentry *perf_debugfs_dir; +static unsigned long max_mw_size; +module_param(max_mw_size, ulong, 0644); +MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows"); + static unsigned int seg_order = 19; /* 512K */ module_param(seg_order, uint, 0644); MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing"); @@ -472,6 +476,10 @@ static void perf_link_work(struct work_struct *work) dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); size = perf->mw.phys_size; + + if (max_mw_size && size > max_mw_size) + size = max_mw_size; + ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size)); ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size)); ntb_peer_spad_write(ndev, VERSION, PERF_VERSION); From 8b71d285061181f91194114cc7dabce73185eed1 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 3 Jun 2016 14:50:33 -0600 Subject: [PATCH 05/17] ntb_tool: Add memory window debug support We allocate some memory window buffers when the link comes up, then we provide debugfs files to read/write each side of the link. This is useful for debugging the mapping when writing new drivers. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_tool.c | 258 +++++++++++++++++++++++++++++++++++- 1 file changed, 257 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index a0ead31cd79946..cba31fd0a07a2e 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -89,6 +89,7 @@ #include #include #include +#include #include @@ -105,11 +106,29 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESCRIPTION); +#define MAX_MWS 16 + +static unsigned long mw_size = 16; +module_param(mw_size, ulong, 0644); +MODULE_PARM_DESC(mw_size, "size order [n^2] of the memory window for testing"); + static struct dentry *tool_dbgfs; +struct tool_mw { + resource_size_t size; + u8 __iomem *local; + u8 *peer; + dma_addr_t peer_dma; +}; + struct tool_ctx { struct ntb_dev *ntb; struct dentry *dbgfs; + struct work_struct link_cleanup; + bool link_is_up; + struct delayed_work link_work; + int mw_count; + struct tool_mw mws[MAX_MWS]; }; #define SPAD_FNAME_SIZE 0x10 @@ -124,6 +143,111 @@ struct tool_ctx { .write = __write, \ } +static int tool_setup_mw(struct tool_ctx *tc, int idx) +{ + int rc; + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + resource_size_t size, align, align_size; + + if (mw->local) + return 0; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align, + &align_size); + if (rc) + return rc; + + mw->size = min_t(resource_size_t, 1 << mw_size, size); + mw->size = round_up(mw->size, align); + mw->size = round_up(mw->size, align_size); + + mw->local = ioremap_wc(base, size); + if (mw->local == NULL) + return -EFAULT; + + mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size, + &mw->peer_dma, GFP_KERNEL); + + if (mw->peer == NULL) + return -ENOMEM; + + rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size); + if (rc) + return rc; + + return 0; +} + +static void tool_free_mws(struct tool_ctx *tc) +{ + int i; + + for (i = 0; i < tc->mw_count; i++) { + if (tc->mws[i].peer) { + ntb_mw_clear_trans(tc->ntb, i); + dma_free_coherent(&tc->ntb->pdev->dev, tc->mws[i].size, + tc->mws[i].peer, + tc->mws[i].peer_dma); + + } + + tc->mws[i].peer = NULL; + tc->mws[i].peer_dma = 0; + + if (tc->mws[i].local) + iounmap(tc->mws[i].local); + + tc->mws[i].local = NULL; + } + + tc->mw_count = 0; +} + +static int tool_setup_mws(struct tool_ctx *tc) +{ + int i; + int rc; + + tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); + + for (i = 0; i < tc->mw_count; i++) { + rc = tool_setup_mw(tc, i); + if (rc) + goto err_out; + } + + return 0; + +err_out: + tool_free_mws(tc); + return rc; +} + +static void tool_link_work(struct work_struct *work) +{ + int rc; + struct tool_ctx *tc = container_of(work, struct tool_ctx, + link_work.work); + + tool_free_mws(tc); + rc = tool_setup_mws(tc); + if (rc) + dev_err(&tc->ntb->dev, + "Error setting up memory windows: %d\n", rc); + + tc->link_is_up = true; +} + +static void tool_link_cleanup(struct work_struct *work) +{ + struct tool_ctx *tc = container_of(work, struct tool_ctx, + link_cleanup); + + if (!tc->link_is_up) + cancel_delayed_work_sync(&tc->link_work); +} + static void tool_link_event(void *ctx) { struct tool_ctx *tc = ctx; @@ -135,6 +259,11 @@ static void tool_link_event(void *ctx) dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", up ? "up" : "down", speed, width); + + if (up) + schedule_delayed_work(&tc->link_work, 2*HZ); + else + schedule_work(&tc->link_cleanup); } static void tool_db_event(void *ctx, int vec) @@ -449,8 +578,112 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops, tool_peer_spad_read, tool_peer_spad_write); + +static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (mw->local == NULL) + return -EIO; + if (pos < 0) + return -EINVAL; + if (pos >= mw->size || !size) + return 0; + if (size > mw->size - pos) + size = mw->size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy_fromio(buf, mw->local + pos, size); + rc = copy_to_user(ubuf, buf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + +err_free: + kfree(buf); + + return rc; +} + +static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + ssize_t rc; + loff_t pos = *offp; + void *buf; + + if (pos < 0) + return -EINVAL; + if (pos >= mw->size || !size) + return 0; + if (size > mw->size - pos) + size = mw->size - pos; + + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + rc = copy_from_user(buf, ubuf, size); + if (rc == size) { + rc = -EFAULT; + goto err_free; + } + + size -= rc; + *offp = pos + size; + rc = size; + + memcpy_toio(mw->local + pos, buf, size); + +err_free: + kfree(buf); + + return rc; +} + +static TOOL_FOPS_RDWR(tool_mw_fops, + tool_mw_read, + tool_mw_write); + + +static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size); +} + +static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size); +} + +static TOOL_FOPS_RDWR(tool_peer_mw_fops, + tool_peer_mw_read, + tool_peer_mw_write); + static void tool_setup_dbgfs(struct tool_ctx *tc) { + int mw_count; + int i; + /* This modules is useless without dbgfs... */ if (!tool_dbgfs) { tc->dbgfs = NULL; @@ -479,6 +712,20 @@ static void tool_setup_dbgfs(struct tool_ctx *tc) debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, tc, &tool_peer_spad_fops); + + mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); + for (i = 0; i < mw_count; i++) { + char buf[30]; + + snprintf(buf, sizeof(buf), "mw%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_mw_fops); + + snprintf(buf, sizeof(buf), "peer_mw%d", i); + debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, + &tc->mws[i], &tool_peer_mw_fops); + + } } static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) @@ -492,13 +739,15 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) if (ntb_spad_is_unsafe(ntb)) dev_dbg(&ntb->dev, "scratchpad is unsafe\n"); - tc = kmalloc(sizeof(*tc), GFP_KERNEL); + tc = kzalloc(sizeof(*tc), GFP_KERNEL); if (!tc) { rc = -ENOMEM; goto err_tc; } tc->ntb = ntb; + INIT_DELAYED_WORK(&tc->link_work, tool_link_work); + INIT_WORK(&tc->link_cleanup, tool_link_cleanup); tool_setup_dbgfs(tc); @@ -513,6 +762,8 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) err_ctx: debugfs_remove_recursive(tc->dbgfs); + cancel_delayed_work_sync(&tc->link_work); + cancel_work_sync(&tc->link_cleanup); kfree(tc); err_tc: return rc; @@ -522,6 +773,11 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc = ntb->ctx; + cancel_delayed_work_sync(&tc->link_work); + cancel_work_sync(&tc->link_cleanup); + + tool_free_mws(tc); + ntb_clear_ctx(ntb); ntb_link_disable(ntb); From 19645a077120c6417e9dc5ad469c45194cf78a82 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 7 Jun 2016 11:20:22 -0600 Subject: [PATCH 06/17] ntb_transport: Check the number of spads the hardware supports I'm working on hardware that currently has a limited number of scratchpad registers and ntb_ndev fails with no clue as to why. I feel it is better to fail early and provide a reasonable error message then to fail later on. The same is done to ntb_perf, but it doesn't currently require enough spads to actually fail. I've also removed the unused SPAD_MSG and SPAD_ACK enums so that MAX_SPAD accurately reflects the number of spads used. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 9 +++++++-- drivers/ntb/test/ntb_perf.c | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 6db8c8528f266b..d5c5894f252e04 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1060,6 +1060,13 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) int node; int rc, i; + mw_count = ntb_mw_count(ndev); + if (ntb_spad_count(ndev) < (NUM_MWS + 1 + mw_count * 2)) { + dev_err(&ndev->dev, "Not enough scratch pad registers for %s", + NTB_TRANSPORT_NAME); + return -EIO; + } + if (ntb_db_is_unsafe(ndev)) dev_dbg(&ndev->dev, "doorbell is unsafe, proceed anyway...\n"); @@ -1075,8 +1082,6 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) nt->ndev = ndev; - mw_count = ntb_mw_count(ndev); - nt->mw_count = mw_count; nt->mw_vec = kzalloc_node(mw_count * sizeof(*nt->mw_vec), diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 30635c89320cc4..4368519da1023f 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -143,8 +143,6 @@ enum { VERSION = 0, MW_SZ_HIGH, MW_SZ_LOW, - SPAD_MSG, - SPAD_ACK, MAX_SPAD }; @@ -696,6 +694,12 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) int node; int rc = 0; + if (ntb_spad_count(ntb) < MAX_SPAD) { + dev_err(&ntb->dev, "Not enough scratch pad registers for %s", + DRIVER_NAME); + return -EIO; + } + node = dev_to_node(&pdev->dev); perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node); From fd2ecd885bab8e456298d0b702806ea736456c62 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:04 -0600 Subject: [PATCH 07/17] ntb_perf: Schedule based on time not on performance When debugging performance problems, if some issue causes the ntb hardware to be significantly slower than expected, ntb_perf will hang requiring a reboot because it only schedules once every 4GB. Instead, schedule based on jiffies so it will not hang the CPU if the transfer is slow. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 4368519da1023f..5008ccf600a959 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -273,6 +273,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, char __iomem *tmp = dst; u64 perf, diff_us; ktime_t kstart, kstop, kdiff; + unsigned long last_sleep = jiffies; chunks = div64_u64(win_size, buf_size); total_chunks = div64_u64(total, buf_size); @@ -288,8 +289,9 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, } else tmp += buf_size; - /* Probably should schedule every 4GB to prevent soft hang. */ - if (((copied % SZ_4G) == 0) && !use_dma) { + /* Probably should schedule every 5s to prevent soft hang. */ + if (unlikely((jiffies - last_sleep) > 5 * HZ)) { + last_sleep = jiffies; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } From da573eaa3a13f60efafcbe25e4f4465cf1a1b40b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:05 -0600 Subject: [PATCH 08/17] ntb_perf: Improve thread handling to increase robustness This commit accomplishes a few things: 1) Properly prevent multiple sets of threads from running at once using a mutex. Lots of race issues existed with the thread_cleanup. 2) The mutex allows us to ensure that threads are finished before tearing down the device or module. 3) Don't use kthread_stop when the threads can exit by themselves, as this is counter-indicated by the kthread_create documentation. Threads now wait for kthread_stop to occur. 4) Writing to the run file now blocks until the threads are complete. The test can then be safely interrupted by a SIGINT. Also, while I was at it: 5) debugfs_run_write shouldn't return 0 in the early check cases as this could cause debugfs_run_write to loop undesirably. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 124 ++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 48 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 5008ccf600a959..db4dc61164ca87 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -58,6 +58,7 @@ #include #include #include +#include #define DRIVER_NAME "ntb_perf" #define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool" @@ -121,6 +122,7 @@ struct pthr_ctx { int dma_prep_err; int src_idx; void *srcs[MAX_SRCS]; + wait_queue_head_t *wq; }; struct perf_ctx { @@ -134,9 +136,11 @@ struct perf_ctx { struct dentry *debugfs_run; struct dentry *debugfs_threads; u8 perf_threads; - bool run; + /* mutex ensures only one set of threads run at once */ + struct mutex run_mutex; struct pthr_ctx pthr_ctx[MAX_THREADS]; atomic_t tsync; + atomic_t tdone; }; enum { @@ -295,12 +299,18 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } + + if (unlikely(kthread_should_stop())) + break; } if (use_dma) { pr_info("%s: All DMA descriptors submitted\n", current->comm); - while (atomic_read(&pctx->dma_sync) != 0) + while (atomic_read(&pctx->dma_sync) != 0) { + if (kthread_should_stop()) + break; msleep(20); + } } kstop = ktime_get(); @@ -393,7 +403,10 @@ static int ntb_perf_thread(void *data) pctx->srcs[i] = NULL; } - return 0; + atomic_inc(&perf->tdone); + wake_up(pctx->wq); + rc = 0; + goto done; err: for (i = 0; i < MAX_SRCS; i++) { @@ -406,6 +419,16 @@ static int ntb_perf_thread(void *data) pctx->dma_chan = NULL; } +done: + /* Wait until we are told to stop */ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; + schedule(); + } + __set_current_state(TASK_RUNNING); + return rc; } @@ -553,6 +576,7 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, struct perf_ctx *perf = filp->private_data; char *buf; ssize_t ret, out_offset; + int running; if (!perf) return 0; @@ -560,7 +584,9 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, buf = kmalloc(64, GFP_KERNEL); if (!buf) return -ENOMEM; - out_offset = snprintf(buf, 64, "%d\n", perf->run); + + running = mutex_is_locked(&perf->run_mutex); + out_offset = snprintf(buf, 64, "%d\n", running); ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); kfree(buf); @@ -572,7 +598,6 @@ static void threads_cleanup(struct perf_ctx *perf) struct pthr_ctx *pctx; int i; - perf->run = false; for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { @@ -587,65 +612,66 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, { struct perf_ctx *perf = filp->private_data; int node, i; + DECLARE_WAIT_QUEUE_HEAD(wq); if (!perf->link_is_up) - return 0; + return -ENOLINK; if (perf->perf_threads == 0) - return 0; + return -EINVAL; - if (atomic_read(&perf->tsync) == 0) - perf->run = false; + if (!mutex_trylock(&perf->run_mutex)) + return -EBUSY; - if (perf->run) - threads_cleanup(perf); - else { - perf->run = true; + if (perf->perf_threads > MAX_THREADS) { + perf->perf_threads = MAX_THREADS; + pr_info("Reset total threads to: %u\n", MAX_THREADS); + } - if (perf->perf_threads > MAX_THREADS) { - perf->perf_threads = MAX_THREADS; - pr_info("Reset total threads to: %u\n", MAX_THREADS); - } + /* no greater than 1M */ + if (seg_order > MAX_SEG_ORDER) { + seg_order = MAX_SEG_ORDER; + pr_info("Fix seg_order to %u\n", seg_order); + } - /* no greater than 1M */ - if (seg_order > MAX_SEG_ORDER) { - seg_order = MAX_SEG_ORDER; - pr_info("Fix seg_order to %u\n", seg_order); - } + if (run_order < seg_order) { + run_order = seg_order; + pr_info("Fix run_order to %u\n", run_order); + } - if (run_order < seg_order) { - run_order = seg_order; - pr_info("Fix run_order to %u\n", run_order); - } + node = dev_to_node(&perf->ntb->pdev->dev); + atomic_set(&perf->tdone, 0); - node = dev_to_node(&perf->ntb->pdev->dev); - /* launch kernel thread */ - for (i = 0; i < perf->perf_threads; i++) { - struct pthr_ctx *pctx; - - pctx = &perf->pthr_ctx[i]; - atomic_set(&pctx->dma_sync, 0); - pctx->perf = perf; - pctx->thread = - kthread_create_on_node(ntb_perf_thread, - (void *)pctx, - node, "ntb_perf %d", i); - if (IS_ERR(pctx->thread)) { - pctx->thread = NULL; - goto err; - } else - wake_up_process(pctx->thread); - - if (perf->run == false) - return -ENXIO; - } + /* launch kernel thread */ + for (i = 0; i < perf->perf_threads; i++) { + struct pthr_ctx *pctx; + pctx = &perf->pthr_ctx[i]; + atomic_set(&pctx->dma_sync, 0); + pctx->perf = perf; + pctx->wq = &wq; + pctx->thread = + kthread_create_on_node(ntb_perf_thread, + (void *)pctx, + node, "ntb_perf %d", i); + if (IS_ERR(pctx->thread)) { + pctx->thread = NULL; + goto err; + } else { + wake_up_process(pctx->thread); + } } + wait_event_interruptible(wq, + atomic_read(&perf->tdone) == perf->perf_threads); + + threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return count; err: threads_cleanup(perf); + mutex_unlock(&perf->run_mutex); return -ENXIO; } @@ -713,7 +739,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) perf->ntb = ntb; perf->perf_threads = 1; atomic_set(&perf->tsync, 0); - perf->run = false; + mutex_init(&perf->run_mutex); spin_lock_init(&perf->db_lock); perf_setup_mw(ntb, perf); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); @@ -748,6 +774,8 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) dev_dbg(&perf->ntb->dev, "%s called\n", __func__); + mutex_lock(&perf->run_mutex); + cancel_delayed_work_sync(&perf->link_work); cancel_work_sync(&perf->link_cleanup); From 58fd0f3b1539a5bad451a823c9d039ea23387b23 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:06 -0600 Subject: [PATCH 09/17] ntb_perf: Return results by reading the run file Instead of having to watch logs, allow the results to be retrieved by reading back the run file. This file will return "running" when the test is running and nothing if no tests have been run yet. It returns 1 line per thread, and will display an error message if the corresponding thread returns an error. With the above change, the pr_info calls that returned the results are then changed to pr_debug calls. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 67 ++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 12 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index db4dc61164ca87..05a870524a67b7 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -123,6 +123,9 @@ struct pthr_ctx { int src_idx; void *srcs[MAX_SRCS]; wait_queue_head_t *wq; + int status; + u64 copied; + u64 diff_us; }; struct perf_ctx { @@ -305,7 +308,7 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, } if (use_dma) { - pr_info("%s: All DMA descriptors submitted\n", current->comm); + pr_debug("%s: All DMA descriptors submitted\n", current->comm); while (atomic_read(&pctx->dma_sync) != 0) { if (kthread_should_stop()) break; @@ -317,13 +320,16 @@ static int perf_move_data(struct pthr_ctx *pctx, char __iomem *dst, char *src, kdiff = ktime_sub(kstop, kstart); diff_us = ktime_to_us(kdiff); - pr_info("%s: copied %llu bytes\n", current->comm, copied); + pr_debug("%s: copied %llu bytes\n", current->comm, copied); - pr_info("%s: lasted %llu usecs\n", current->comm, diff_us); + pr_debug("%s: lasted %llu usecs\n", current->comm, diff_us); perf = div64_u64(copied, diff_us); - pr_info("%s: MBytes/s: %llu\n", current->comm, perf); + pr_debug("%s: MBytes/s: %llu\n", current->comm, perf); + + pctx->copied = copied; + pctx->diff_us = diff_us; return 0; } @@ -345,7 +351,7 @@ static int ntb_perf_thread(void *data) int rc, node, i; struct dma_chan *dma_chan = NULL; - pr_info("kthread %s starting...\n", current->comm); + pr_debug("kthread %s starting...\n", current->comm); node = dev_to_node(&pdev->dev); @@ -575,19 +581,44 @@ static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, { struct perf_ctx *perf = filp->private_data; char *buf; - ssize_t ret, out_offset; - int running; + ssize_t ret, out_off = 0; + struct pthr_ctx *pctx; + int i; + u64 rate; if (!perf) return 0; - buf = kmalloc(64, GFP_KERNEL); + buf = kmalloc(1024, GFP_KERNEL); if (!buf) return -ENOMEM; - running = mutex_is_locked(&perf->run_mutex); - out_offset = snprintf(buf, 64, "%d\n", running); - ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + if (mutex_is_locked(&perf->run_mutex)) { + out_off = snprintf(buf, 64, "running\n"); + goto read_from_buf; + } + + for (i = 0; i < MAX_THREADS; i++) { + pctx = &perf->pthr_ctx[i]; + + if (pctx->status == -ENODATA) + break; + + if (pctx->status) { + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: error %d\n", i, + pctx->status); + continue; + } + + rate = div64_u64(pctx->copied, pctx->diff_us); + out_off += snprintf(buf + out_off, 1024 - out_off, + "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n", + i, pctx->copied, pctx->diff_us, rate); + } + +read_from_buf: + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_off); kfree(buf); return ret; @@ -601,12 +632,20 @@ static void threads_cleanup(struct perf_ctx *perf) for (i = 0; i < MAX_THREADS; i++) { pctx = &perf->pthr_ctx[i]; if (pctx->thread) { - kthread_stop(pctx->thread); + pctx->status = kthread_stop(pctx->thread); pctx->thread = NULL; } } } +static void perf_clear_thread_status(struct perf_ctx *perf) +{ + int i; + + for (i = 0; i < MAX_THREADS; i++) + perf->pthr_ctx[i].status = -ENODATA; +} + static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, size_t count, loff_t *offp) { @@ -623,6 +662,8 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, if (!mutex_trylock(&perf->run_mutex)) return -EBUSY; + perf_clear_thread_status(perf); + if (perf->perf_threads > MAX_THREADS) { perf->perf_threads = MAX_THREADS; pr_info("Reset total threads to: %u\n", MAX_THREADS); @@ -757,6 +798,8 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) if (rc) goto err_ctx; + perf_clear_thread_status(perf); + return 0; err_ctx: From 26dc638ae6e32bddfb8d3da0fc93946955c28c78 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:07 -0600 Subject: [PATCH 10/17] ntb_perf: Wait for link before running test Instead of returning immediately with an error when the link is down, wait for the link to come up (or the user sends a SIGINT). This is to make scripting ntb_perf easier. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index 05a870524a67b7..f0784e50ceb756 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -135,6 +135,7 @@ struct perf_ctx { bool link_is_up; struct work_struct link_cleanup; struct delayed_work link_work; + wait_queue_head_t link_wq; struct dentry *debugfs_node_dir; struct dentry *debugfs_run; struct dentry *debugfs_threads; @@ -533,6 +534,7 @@ static void perf_link_work(struct work_struct *work) goto out1; perf->link_is_up = true; + wake_up(&perf->link_wq); return; @@ -653,7 +655,7 @@ static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, int node, i; DECLARE_WAIT_QUEUE_HEAD(wq); - if (!perf->link_is_up) + if (wait_event_interruptible(perf->link_wq, perf->link_is_up)) return -ENOLINK; if (perf->perf_threads == 0) @@ -783,6 +785,7 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) mutex_init(&perf->run_mutex); spin_lock_init(&perf->db_lock); perf_setup_mw(ntb, perf); + init_waitqueue_head(&perf->link_wq); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); INIT_WORK(&perf->link_cleanup, perf_link_cleanup); From 717146a2a8cbf6dbcb8fdf4ae7cddd2d6074161c Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:09 -0600 Subject: [PATCH 11/17] ntb_tool: Postpone memory window initialization for the user In order to make the interface closer to the raw NTB API, this commit changes memory windows so they are not initialized on link up. Instead, the 'peer_trans*' debugfs files are introduced. When read, they return information provided by ntb_mw_get_range. When written, they create a buffer and initialize the memory window. The value written is taken as the requested size of the buffer (which is then rounded for alignment). Writing a value of zero frees the buffer and tears down the memory window translation. The 'peer_mw*' file is only created once the memory window translation is setup by the user. Additionally, it was noticed that the read and write functions for the 'peer_mw*' files should have checked for a NULL pointer. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_tool.c | 366 ++++++++++++++++++++++-------------- 1 file changed, 228 insertions(+), 138 deletions(-) diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index cba31fd0a07a2e..1509b4c1f20482 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -79,6 +79,13 @@ * root@self# cat $DBG_DIR/spad * * Observe that spad 0 and 1 have the values set by the peer. + * + * # Check the memory window translation info + * cat $DBG_DIR/peer_trans0 + * + * # Setup a 16k memory window buffer + * echo 16384 > $DBG_DIR/peer_trans0 + * */ #include @@ -108,25 +115,22 @@ MODULE_DESCRIPTION(DRIVER_DESCRIPTION); #define MAX_MWS 16 -static unsigned long mw_size = 16; -module_param(mw_size, ulong, 0644); -MODULE_PARM_DESC(mw_size, "size order [n^2] of the memory window for testing"); - static struct dentry *tool_dbgfs; struct tool_mw { + int idx; + struct tool_ctx *tc; + resource_size_t win_size; resource_size_t size; u8 __iomem *local; u8 *peer; dma_addr_t peer_dma; + struct dentry *peer_dbg_file; }; struct tool_ctx { struct ntb_dev *ntb; struct dentry *dbgfs; - struct work_struct link_cleanup; - bool link_is_up; - struct delayed_work link_work; int mw_count; struct tool_mw mws[MAX_MWS]; }; @@ -143,111 +147,6 @@ struct tool_ctx { .write = __write, \ } -static int tool_setup_mw(struct tool_ctx *tc, int idx) -{ - int rc; - struct tool_mw *mw = &tc->mws[idx]; - phys_addr_t base; - resource_size_t size, align, align_size; - - if (mw->local) - return 0; - - rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align, - &align_size); - if (rc) - return rc; - - mw->size = min_t(resource_size_t, 1 << mw_size, size); - mw->size = round_up(mw->size, align); - mw->size = round_up(mw->size, align_size); - - mw->local = ioremap_wc(base, size); - if (mw->local == NULL) - return -EFAULT; - - mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size, - &mw->peer_dma, GFP_KERNEL); - - if (mw->peer == NULL) - return -ENOMEM; - - rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size); - if (rc) - return rc; - - return 0; -} - -static void tool_free_mws(struct tool_ctx *tc) -{ - int i; - - for (i = 0; i < tc->mw_count; i++) { - if (tc->mws[i].peer) { - ntb_mw_clear_trans(tc->ntb, i); - dma_free_coherent(&tc->ntb->pdev->dev, tc->mws[i].size, - tc->mws[i].peer, - tc->mws[i].peer_dma); - - } - - tc->mws[i].peer = NULL; - tc->mws[i].peer_dma = 0; - - if (tc->mws[i].local) - iounmap(tc->mws[i].local); - - tc->mws[i].local = NULL; - } - - tc->mw_count = 0; -} - -static int tool_setup_mws(struct tool_ctx *tc) -{ - int i; - int rc; - - tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); - - for (i = 0; i < tc->mw_count; i++) { - rc = tool_setup_mw(tc, i); - if (rc) - goto err_out; - } - - return 0; - -err_out: - tool_free_mws(tc); - return rc; -} - -static void tool_link_work(struct work_struct *work) -{ - int rc; - struct tool_ctx *tc = container_of(work, struct tool_ctx, - link_work.work); - - tool_free_mws(tc); - rc = tool_setup_mws(tc); - if (rc) - dev_err(&tc->ntb->dev, - "Error setting up memory windows: %d\n", rc); - - tc->link_is_up = true; -} - -static void tool_link_cleanup(struct work_struct *work) -{ - struct tool_ctx *tc = container_of(work, struct tool_ctx, - link_cleanup); - - if (!tc->link_is_up) - cancel_delayed_work_sync(&tc->link_work); -} - static void tool_link_event(void *ctx) { struct tool_ctx *tc = ctx; @@ -260,10 +159,6 @@ static void tool_link_event(void *ctx) dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", up ? "up" : "down", speed, width); - if (up) - schedule_delayed_work(&tc->link_work, 2*HZ); - else - schedule_work(&tc->link_cleanup); } static void tool_db_event(void *ctx, int vec) @@ -591,10 +486,10 @@ static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, return -EIO; if (pos < 0) return -EINVAL; - if (pos >= mw->size || !size) + if (pos >= mw->win_size || !size) return 0; - if (size > mw->size - pos) - size = mw->size - pos; + if (size > mw->win_size - pos) + size = mw->win_size - pos; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -627,10 +522,10 @@ static ssize_t tool_mw_write(struct file *filep, const char __user *ubuf, if (pos < 0) return -EINVAL; - if (pos >= mw->size || !size) + if (pos >= mw->win_size || !size) return 0; - if (size > mw->size - pos) - size = mw->size - pos; + if (size > mw->win_size - pos) + size = mw->win_size - pos; buf = kmalloc(size, GFP_KERNEL); if (!buf) @@ -658,20 +553,25 @@ static TOOL_FOPS_RDWR(tool_mw_fops, tool_mw_read, tool_mw_write); - static ssize_t tool_peer_mw_read(struct file *filep, char __user *ubuf, - size_t size, loff_t *offp) + size_t size, loff_t *offp) { struct tool_mw *mw = filep->private_data; + if (!mw->peer) + return -ENXIO; + return simple_read_from_buffer(ubuf, size, offp, mw->peer, mw->size); } static ssize_t tool_peer_mw_write(struct file *filep, const char __user *ubuf, - size_t size, loff_t *offp) + size_t size, loff_t *offp) { struct tool_mw *mw = filep->private_data; + if (!mw->peer) + return -ENXIO; + return simple_write_to_buffer(mw->peer, mw->size, offp, ubuf, size); } @@ -679,9 +579,199 @@ static TOOL_FOPS_RDWR(tool_peer_mw_fops, tool_peer_mw_read, tool_peer_mw_write); +static int tool_setup_mw(struct tool_ctx *tc, int idx, size_t req_size) +{ + int rc; + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + resource_size_t size, align, align_size; + char buf[16]; + + if (mw->peer) + return 0; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &size, &align, + &align_size); + if (rc) + return rc; + + mw->size = min_t(resource_size_t, req_size, size); + mw->size = round_up(mw->size, align); + mw->size = round_up(mw->size, align_size); + mw->peer = dma_alloc_coherent(&tc->ntb->pdev->dev, mw->size, + &mw->peer_dma, GFP_KERNEL); + + if (!mw->peer) + return -ENOMEM; + + rc = ntb_mw_set_trans(tc->ntb, idx, mw->peer_dma, mw->size); + if (rc) + goto err_free_dma; + + snprintf(buf, sizeof(buf), "peer_mw%d", idx); + mw->peer_dbg_file = debugfs_create_file(buf, S_IRUSR | S_IWUSR, + mw->tc->dbgfs, mw, + &tool_peer_mw_fops); + + return 0; + +err_free_dma: + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + mw->peer = NULL; + mw->peer_dma = 0; + mw->size = 0; + + return rc; +} + +static void tool_free_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + + if (mw->peer) { + ntb_mw_clear_trans(tc->ntb, idx); + dma_free_coherent(&tc->ntb->pdev->dev, mw->size, + mw->peer, + mw->peer_dma); + } + + mw->peer = NULL; + mw->peer_dma = 0; + + debugfs_remove(mw->peer_dbg_file); + + mw->peer_dbg_file = NULL; +} + +static ssize_t tool_peer_mw_trans_read(struct file *filep, + char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char *buf; + size_t buf_size; + ssize_t ret, off = 0; + + phys_addr_t base; + resource_size_t mw_size; + resource_size_t align; + resource_size_t align_size; + + buf_size = min_t(size_t, size, 512); + + buf = kmalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ntb_mw_get_range(mw->tc->ntb, mw->idx, + &base, &mw_size, &align, &align_size); + + off += scnprintf(buf + off, buf_size - off, + "Peer MW %d Information:\n", mw->idx); + + off += scnprintf(buf + off, buf_size - off, + "Physical Address \t%pa[p]\n", + &base); + + off += scnprintf(buf + off, buf_size - off, + "Window Size \t%lld\n", + (unsigned long long)mw_size); + + off += scnprintf(buf + off, buf_size - off, + "Alignment \t%lld\n", + (unsigned long long)align); + + off += scnprintf(buf + off, buf_size - off, + "Size Alignment \t%lld\n", + (unsigned long long)align_size); + + off += scnprintf(buf + off, buf_size - off, + "Ready \t%c\n", + (mw->peer) ? 'Y' : 'N'); + + off += scnprintf(buf + off, buf_size - off, + "Allocated Size \t%zd\n", + (mw->peer) ? (size_t)mw->size : 0); + + ret = simple_read_from_buffer(ubuf, size, offp, buf, off); + kfree(buf); + return ret; +} + +static ssize_t tool_peer_mw_trans_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_mw *mw = filep->private_data; + + char buf[32]; + size_t buf_size; + unsigned long long val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = kstrtoull(buf, 0, &val); + if (rc) + return rc; + + tool_free_mw(mw->tc, mw->idx); + if (val) + rc = tool_setup_mw(mw->tc, mw->idx, val); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_peer_mw_trans_fops, + tool_peer_mw_trans_read, + tool_peer_mw_trans_write); + +static int tool_init_mw(struct tool_ctx *tc, int idx) +{ + struct tool_mw *mw = &tc->mws[idx]; + phys_addr_t base; + int rc; + + rc = ntb_mw_get_range(tc->ntb, idx, &base, &mw->win_size, + NULL, NULL); + if (rc) + return rc; + + mw->tc = tc; + mw->idx = idx; + mw->local = ioremap_wc(base, mw->win_size); + if (!mw->local) + return -EFAULT; + + return 0; +} + +static void tool_free_mws(struct tool_ctx *tc) +{ + int i; + + for (i = 0; i < tc->mw_count; i++) { + tool_free_mw(tc, i); + + if (tc->mws[i].local) + iounmap(tc->mws[i].local); + + tc->mws[i].local = NULL; + } +} + static void tool_setup_dbgfs(struct tool_ctx *tc) { - int mw_count; int i; /* This modules is useless without dbgfs... */ @@ -713,18 +803,16 @@ static void tool_setup_dbgfs(struct tool_ctx *tc) debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, tc, &tool_peer_spad_fops); - mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); - for (i = 0; i < mw_count; i++) { + for (i = 0; i < tc->mw_count; i++) { char buf[30]; snprintf(buf, sizeof(buf), "mw%d", i); debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, &tc->mws[i], &tool_mw_fops); - snprintf(buf, sizeof(buf), "peer_mw%d", i); + snprintf(buf, sizeof(buf), "peer_trans%d", i); debugfs_create_file(buf, S_IRUSR | S_IWUSR, tc->dbgfs, - &tc->mws[i], &tool_peer_mw_fops); - + &tc->mws[i], &tool_peer_mw_trans_fops); } } @@ -732,6 +820,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc; int rc; + int i; if (ntb_db_is_unsafe(ntb)) dev_dbg(&ntb->dev, "doorbell is unsafe\n"); @@ -746,8 +835,13 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) } tc->ntb = ntb; - INIT_DELAYED_WORK(&tc->link_work, tool_link_work); - INIT_WORK(&tc->link_cleanup, tool_link_cleanup); + + tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); + for (i = 0; i < tc->mw_count; i++) { + rc = tool_init_mw(tc, i); + if (rc) + goto err_ctx; + } tool_setup_dbgfs(tc); @@ -761,9 +855,8 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) return 0; err_ctx: + tool_free_mws(tc); debugfs_remove_recursive(tc->dbgfs); - cancel_delayed_work_sync(&tc->link_work); - cancel_work_sync(&tc->link_cleanup); kfree(tc); err_tc: return rc; @@ -773,9 +866,6 @@ static void tool_remove(struct ntb_client *self, struct ntb_dev *ntb) { struct tool_ctx *tc = ntb->ctx; - cancel_delayed_work_sync(&tc->link_work); - cancel_work_sync(&tc->link_cleanup); - tool_free_mws(tc); ntb_clear_ctx(ntb); From bfcaa39652bf64294261415e5fa18ef0445a4d74 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:10 -0600 Subject: [PATCH 12/17] ntb_tool: Add link status and files to debugfs In order to more successfully script with ntb_tool it's useful to have a link file to check the link status so that the script doesn't use the other files until the link is up. This commit adds a 'link' file to the debugfs directory which reads boolean (Y or N) depending on the link status. Writing to the file change the link state using ntb_link_enable or ntb_link_disable. A 'link_event' file is also provided so an application can block until the link changes to the desired state. If the user writes a 1, it will block until the link is up. If the user writes a 0, it will block until the link is down. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_tool.c | 92 +++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/drivers/ntb/test/ntb_tool.c b/drivers/ntb/test/ntb_tool.c index 1509b4c1f20482..61bf2ef87e0ebf 100644 --- a/drivers/ntb/test/ntb_tool.c +++ b/drivers/ntb/test/ntb_tool.c @@ -59,6 +59,12 @@ * * Eg: check if clearing the doorbell mask generates an interrupt. * + * # Check the link status + * root@self# cat $DBG_DIR/link + * + * # Block until the link is up + * root@self# echo Y > $DBG_DIR/link_event + * * # Set the doorbell mask * root@self# echo 's 1' > $DBG_DIR/mask * @@ -131,6 +137,7 @@ struct tool_mw { struct tool_ctx { struct ntb_dev *ntb; struct dentry *dbgfs; + wait_queue_head_t link_wq; int mw_count; struct tool_mw mws[MAX_MWS]; }; @@ -159,6 +166,7 @@ static void tool_link_event(void *ctx) dev_dbg(&tc->ntb->dev, "link is %s speed %d width %d\n", up ? "up" : "down", speed, width); + wake_up(&tc->link_wq); } static void tool_db_event(void *ctx, int vec) @@ -473,6 +481,83 @@ static TOOL_FOPS_RDWR(tool_peer_spad_fops, tool_peer_spad_read, tool_peer_spad_write); +static ssize_t tool_link_read(struct file *filep, char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[3]; + + buf[0] = ntb_link_is_up(tc->ntb, NULL, NULL) ? 'Y' : 'N'; + buf[1] = '\n'; + buf[2] = '\0'; + + return simple_read_from_buffer(ubuf, size, offp, buf, 2); +} + +static ssize_t tool_link_write(struct file *filep, const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (val) + rc = ntb_link_enable(tc->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); + else + rc = ntb_link_disable(tc->ntb); + + if (rc) + return rc; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_fops, + tool_link_read, + tool_link_write); + +static ssize_t tool_link_event_write(struct file *filep, + const char __user *ubuf, + size_t size, loff_t *offp) +{ + struct tool_ctx *tc = filep->private_data; + char buf[32]; + size_t buf_size; + bool val; + int rc; + + buf_size = min(size, (sizeof(buf) - 1)); + if (copy_from_user(buf, ubuf, buf_size)) + return -EFAULT; + + buf[buf_size] = '\0'; + + rc = strtobool(buf, &val); + if (rc) + return rc; + + if (wait_event_interruptible(tc->link_wq, + ntb_link_is_up(tc->ntb, NULL, NULL) == val)) + return -ERESTART; + + return size; +} + +static TOOL_FOPS_RDWR(tool_link_event_fops, + NULL, + tool_link_event_write); static ssize_t tool_mw_read(struct file *filep, char __user *ubuf, size_t size, loff_t *offp) @@ -803,6 +888,12 @@ static void tool_setup_dbgfs(struct tool_ctx *tc) debugfs_create_file("peer_spad", S_IRUSR | S_IWUSR, tc->dbgfs, tc, &tool_peer_spad_fops); + debugfs_create_file("link", S_IRUSR | S_IWUSR, tc->dbgfs, + tc, &tool_link_fops); + + debugfs_create_file("link_event", S_IWUSR, tc->dbgfs, + tc, &tool_link_event_fops); + for (i = 0; i < tc->mw_count; i++) { char buf[30]; @@ -835,6 +926,7 @@ static int tool_probe(struct ntb_client *self, struct ntb_dev *ntb) } tc->ntb = ntb; + init_waitqueue_head(&tc->link_wq); tc->mw_count = min(ntb_mw_count(tc->ntb), MAX_MWS); for (i = 0; i < tc->mw_count; i++) { From 20572ee1c577609f38b56b81c760dcb4151f1dbf Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:11 -0600 Subject: [PATCH 13/17] ntb_pingpong: Add a debugfs file to get the ping count This commit adds a debugfs 'count' file to ntb_pingpong. This is so testing with ntb_pingpong can be automated beyond just checking the logs for pong messages. The count file returns a number which increments every pong. The counter can be cleared by writing a zero. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_pingpong.c | 62 ++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/test/ntb_pingpong.c b/drivers/ntb/test/ntb_pingpong.c index fe160056698126..7d311799fca169 100644 --- a/drivers/ntb/test/ntb_pingpong.c +++ b/drivers/ntb/test/ntb_pingpong.c @@ -61,6 +61,7 @@ #include #include #include +#include #include @@ -96,8 +97,13 @@ struct pp_ctx { spinlock_t db_lock; struct timer_list db_timer; unsigned long db_delay; + struct dentry *debugfs_node_dir; + struct dentry *debugfs_count; + atomic_t count; }; +static struct dentry *pp_debugfs_dir; + static void pp_ping(unsigned long ctx) { struct pp_ctx *pp = (void *)ctx; @@ -171,10 +177,32 @@ static void pp_db_event(void *ctx, int vec) dev_dbg(&pp->ntb->dev, "Pong vec %d bits %#llx\n", vec, db_bits); + atomic_inc(&pp->count); } spin_unlock_irqrestore(&pp->db_lock, irqflags); } +static int pp_debugfs_setup(struct pp_ctx *pp) +{ + struct pci_dev *pdev = pp->ntb->pdev; + + if (!pp_debugfs_dir) + return -ENODEV; + + pp->debugfs_node_dir = debugfs_create_dir(pci_name(pdev), + pp_debugfs_dir); + if (!pp->debugfs_node_dir) + return -ENODEV; + + pp->debugfs_count = debugfs_create_atomic_t("count", S_IRUSR | S_IWUSR, + pp->debugfs_node_dir, + &pp->count); + if (!pp->debugfs_count) + return -ENODEV; + + return 0; +} + static const struct ntb_ctx_ops pp_ops = { .link_event = pp_link_event, .db_event = pp_db_event, @@ -210,6 +238,7 @@ static int pp_probe(struct ntb_client *client, pp->ntb = ntb; pp->db_bits = 0; + atomic_set(&pp->count, 0); spin_lock_init(&pp->db_lock); setup_timer(&pp->db_timer, pp_ping, (unsigned long)pp); pp->db_delay = msecs_to_jiffies(delay_ms); @@ -218,6 +247,10 @@ static int pp_probe(struct ntb_client *client, if (rc) goto err_ctx; + rc = pp_debugfs_setup(pp); + if (rc) + goto err_ctx; + ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO); ntb_link_event(ntb); @@ -234,6 +267,8 @@ static void pp_remove(struct ntb_client *client, { struct pp_ctx *pp = ntb->ctx; + debugfs_remove_recursive(pp->debugfs_node_dir); + ntb_clear_ctx(ntb); del_timer_sync(&pp->db_timer); ntb_link_disable(ntb); @@ -247,4 +282,29 @@ static struct ntb_client pp_client = { .remove = pp_remove, }, }; -module_ntb_client(pp_client); + +static int __init pp_init(void) +{ + int rc; + + if (debugfs_initialized()) + pp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + + rc = ntb_register_client(&pp_client); + if (rc) + goto err_client; + + return 0; + +err_client: + debugfs_remove_recursive(pp_debugfs_dir); + return rc; +} +module_init(pp_init); + +static void __exit pp_exit(void) +{ + ntb_unregister_client(&pp_client); + debugfs_remove_recursive(pp_debugfs_dir); +} +module_exit(pp_exit); From 35539b54ac339e2b2dee8c74bc9f0b06f11b11cf Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:13 -0600 Subject: [PATCH 14/17] ntb_perf: clear link_is_up flag when the link goes down. When the link goes down, the link_is_up flag did not return to false. This could have caused some subtle corner case bugs when the link goes up and down quickly. Once that was fixed, there was found to be a race if the link was brought down then immediately up. The link_cleanup work would occasionally be scheduled after the next link up event. This would cancel the link_work that was supposed to occur and leave ntb_perf in an unusable state. To fix this we get rid of the link_cleanup work and put the actions directly in the link_down event. Signed-off-by: Logan Gunthorpe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/test/ntb_perf.c | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c index f0784e50ceb756..6a50f20bf1cde0 100644 --- a/drivers/ntb/test/ntb_perf.c +++ b/drivers/ntb/test/ntb_perf.c @@ -133,7 +133,6 @@ struct perf_ctx { spinlock_t db_lock; struct perf_mw mw; bool link_is_up; - struct work_struct link_cleanup; struct delayed_work link_work; wait_queue_head_t link_wq; struct dentry *debugfs_node_dir; @@ -158,10 +157,16 @@ static void perf_link_event(void *ctx) { struct perf_ctx *perf = ctx; - if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) + if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1) { schedule_delayed_work(&perf->link_work, 2*HZ); - else - schedule_work(&perf->link_cleanup); + } else { + dev_dbg(&perf->ntb->pdev->dev, "link down\n"); + + if (!perf->link_is_up) + cancel_delayed_work_sync(&perf->link_work); + + perf->link_is_up = false; + } } static void perf_db_event(void *ctx, int vec) @@ -547,18 +552,6 @@ static void perf_link_work(struct work_struct *work) msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT)); } -static void perf_link_cleanup(struct work_struct *work) -{ - struct perf_ctx *perf = container_of(work, - struct perf_ctx, - link_cleanup); - - dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__); - - if (!perf->link_is_up) - cancel_delayed_work_sync(&perf->link_work); -} - static int perf_setup_mw(struct ntb_dev *ntb, struct perf_ctx *perf) { struct perf_mw *mw; @@ -787,7 +780,6 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) perf_setup_mw(ntb, perf); init_waitqueue_head(&perf->link_wq); INIT_DELAYED_WORK(&perf->link_work, perf_link_work); - INIT_WORK(&perf->link_cleanup, perf_link_cleanup); rc = ntb_set_ctx(ntb, perf, &perf_ops); if (rc) @@ -807,7 +799,6 @@ static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb) err_ctx: cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); kfree(perf); err_perf: return rc; @@ -823,7 +814,6 @@ static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb) mutex_lock(&perf->run_mutex); cancel_delayed_work_sync(&perf->link_work); - cancel_work_sync(&perf->link_cleanup); ntb_clear_ctx(ntb); ntb_link_disable(ntb); From a9c59ef77458b27221e71506cdf1bd31a06afb19 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Mon, 20 Jun 2016 13:15:12 -0600 Subject: [PATCH 15/17] ntb_test: Add a selftest script for the NTB subsystem This script automates testing doorbells, scratchpads and memory windows for an NTB device. It can be run locally, with the NTB looped back to the same host or use SSH to remotely control the second host. In the single host case, the script just needs to be passed two arguments: a PCI ID for each side of the link. In the two host case the -r option must be used to specify the remote hostname (which must be SSH accessible and should probably have ssh-keys exchanged). A sample run looks like this: $ sudo ./ntb_test.sh 0000:03:00.1 0000:83:00.1 -p 29 Starting ntb_tool tests... Running link tests on: 0000:03:00.1 / 0000:83:00.1 Passed Running link tests on: 0000:83:00.1 / 0000:03:00.1 Passed Running db tests on: 0000:03:00.1 / 0000:83:00.1 Passed Running db tests on: 0000:83:00.1 / 0000:03:00.1 Passed Running spad tests on: 0000:03:00.1 / 0000:83:00.1 Passed Running spad tests on: 0000:83:00.1 / 0000:03:00.1 Passed Running mw0 tests on: 0000:03:00.1 / 0000:83:00.1 Passed Running mw0 tests on: 0000:83:00.1 / 0000:03:00.1 Passed Running mw1 tests on: 0000:03:00.1 / 0000:83:00.1 Passed Running mw1 tests on: 0000:83:00.1 / 0000:03:00.1 Passed Starting ntb_pingpong tests... Running ping pong tests on: 0000:03:00.1 / 0000:83:00.1 Passed Starting ntb_perf tests... Running local perf test without DMA 0: copied 536870912 bytes in 164453 usecs, 3264 MBytes/s Passed Running remote perf test without DMA 0: copied 536870912 bytes in 164453 usecs, 3264 MBytes/s Passed Signed-off-by: Logan Gunthorpe Acked-by: Shuah Khan Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- MAINTAINERS | 1 + tools/testing/selftests/ntb/ntb_test.sh | 422 ++++++++++++++++++++++++ 2 files changed, 423 insertions(+) create mode 100755 tools/testing/selftests/ntb/ntb_test.sh diff --git a/MAINTAINERS b/MAINTAINERS index 8c20323d127744..46130feca8a823 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8131,6 +8131,7 @@ F: drivers/ntb/ F: drivers/net/ntb_netdev.c F: include/linux/ntb.h F: include/linux/ntb_transport.h +F: tools/testing/selftests/ntb/ NTB INTEL DRIVER M: Jon Mason diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh new file mode 100755 index 00000000000000..a676d3eefefbdd --- /dev/null +++ b/tools/testing/selftests/ntb/ntb_test.sh @@ -0,0 +1,422 @@ +#!/bin/bash +# Copyright (c) 2016 Microsemi. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# Author: Logan Gunthorpe + +REMOTE_HOST= +LIST_DEVS=FALSE + +DEBUGFS=${DEBUGFS-/sys/kernel/debug} + +PERF_RUN_ORDER=32 +MAX_MW_SIZE=0 +RUN_DMA_TESTS= +DONT_CLEANUP= +MW_SIZE=65536 + +function show_help() +{ + echo "Usage: $0 [OPTIONS] LOCAL_DEV REMOTE_DEV" + echo "Run tests on a pair of NTB endpoints." + echo + echo "If the NTB device loops back to the same host then," + echo "just specifying the two PCI ids on the command line is" + echo "sufficient. Otherwise, if the NTB link spans two hosts" + echo "use the -r option to specify the hostname for the remote" + echo "device. SSH will then be used to test the remote side." + echo "An SSH key between the root users of the host would then" + echo "be highly recommended." + echo + echo "Options:" + echo " -C don't cleanup ntb modules on exit" + echo " -d run dma tests" + echo " -h show this help message" + echo " -l list available local and remote PCI ids" + echo " -r REMOTE_HOST specify the remote's hostname to connect" + echo " to for the test (using ssh)" + echo " -p NUM ntb_perf run order (default: $PERF_RUN_ORDER)" + echo " -w max_mw_size maxmium memory window size" + echo +} + +function parse_args() +{ + OPTIND=0 + while getopts "Cdhlm:r:p:w:" opt; do + case "$opt" in + C) DONT_CLEANUP=1 ;; + d) RUN_DMA_TESTS=1 ;; + h) show_help; exit 0 ;; + l) LIST_DEVS=TRUE ;; + m) MW_SIZE=${OPTARG} ;; + r) REMOTE_HOST=${OPTARG} ;; + p) PERF_RUN_ORDER=${OPTARG} ;; + w) MAX_MW_SIZE=${OPTARG} ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + esac + done +} + +parse_args "$@" +shift $((OPTIND-1)) +LOCAL_DEV=$1 +shift +parse_args "$@" +shift $((OPTIND-1)) +REMOTE_DEV=$1 +shift +parse_args "$@" + +set -e + +function _modprobe() +{ + modprobe "$@" +} + +function split_remote() +{ + VPATH=$1 + REMOTE= + + if [[ "$VPATH" == *":/"* ]]; then + REMOTE=${VPATH%%:*} + VPATH=${VPATH#*:} + fi +} + +function read_file() +{ + split_remote $1 + if [[ "$REMOTE" != "" ]]; then + ssh "$REMOTE" cat "$VPATH" + else + cat "$VPATH" + fi +} + +function write_file() +{ + split_remote $2 + VALUE=$1 + + if [[ "$REMOTE" != "" ]]; then + ssh "$REMOTE" "echo \"$VALUE\" > \"$VPATH\"" + else + echo "$VALUE" > "$VPATH" + fi +} + +function link_test() +{ + LOC=$1 + REM=$2 + EXP=0 + + echo "Running link tests on: $(basename $LOC) / $(basename $REM)" + + if ! write_file "N" "$LOC/link" 2> /dev/null; then + echo " Unsupported" + return + fi + + write_file "N" "$LOC/link_event" + + if [[ $(read_file "$REM/link") != "N" ]]; then + echo "Expected remote link to be down in $REM/link" >&2 + exit -1 + fi + + write_file "Y" "$LOC/link" + write_file "Y" "$LOC/link_event" + + echo " Passed" +} + +function doorbell_test() +{ + LOC=$1 + REM=$2 + EXP=0 + + echo "Running db tests on: $(basename $LOC) / $(basename $REM)" + + write_file "c 0xFFFFFFFF" "$REM/db" + + for ((i=1; i <= 8; i++)); do + let DB=$(read_file "$REM/db") || true + if [[ "$DB" != "$EXP" ]]; then + echo "Doorbell doesn't match expected value $EXP " \ + "in $REM/db" >&2 + exit -1 + fi + + let "MASK=1 << ($i-1)" || true + let "EXP=$EXP | $MASK" || true + write_file "s $MASK" "$LOC/peer_db" + done + + echo " Passed" +} + +function read_spad() +{ + VPATH=$1 + IDX=$2 + + ROW=($(read_file "$VPATH" | grep -e "^$IDX")) + let VAL=${ROW[1]} || true + echo $VAL +} + +function scratchpad_test() +{ + LOC=$1 + REM=$2 + CNT=$(read_file "$LOC/spad" | wc -l) + + echo "Running spad tests on: $(basename $LOC) / $(basename $REM)" + + for ((i = 0; i < $CNT; i++)); do + VAL=$RANDOM + write_file "$i $VAL" "$LOC/peer_spad" + RVAL=$(read_spad "$REM/spad" $i) + + if [[ "$VAL" != "$RVAL" ]]; then + echo "Scratchpad doesn't match expected value $VAL " \ + "in $REM/spad, got $RVAL" >&2 + exit -1 + fi + + done + + echo " Passed" +} + +function write_mw() +{ + split_remote $2 + + if [[ "$REMOTE" != "" ]]; then + ssh "$REMOTE" \ + dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true + else + dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true + fi +} + +function mw_test() +{ + IDX=$1 + LOC=$2 + REM=$3 + + echo "Running $IDX tests on: $(basename $LOC) / $(basename $REM)" + + write_mw "$LOC/$IDX" + + split_remote "$LOC/$IDX" + if [[ "$REMOTE" == "" ]]; then + A=$VPATH + else + A=/tmp/ntb_test.$$.A + ssh "$REMOTE" cat "$VPATH" > "$A" + fi + + split_remote "$REM/peer_$IDX" + if [[ "$REMOTE" == "" ]]; then + B=$VPATH + else + B=/tmp/ntb_test.$$.B + ssh "$REMOTE" cat "$VPATH" > "$B" + fi + + cmp -n $MW_SIZE "$A" "$B" + if [[ $? != 0 ]]; then + echo "Memory window $MW did not match!" >&2 + fi + + if [[ "$A" == "/tmp/*" ]]; then + rm "$A" + fi + + if [[ "$B" == "/tmp/*" ]]; then + rm "$B" + fi + + echo " Passed" +} + +function pingpong_test() +{ + LOC=$1 + REM=$2 + + echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)" + + LOC_START=$(read_file $LOC/count) + REM_START=$(read_file $REM/count) + + sleep 7 + + LOC_END=$(read_file $LOC/count) + REM_END=$(read_file $REM/count) + + if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then + echo "Ping pong counter not incrementing!" >&2 + exit 1 + fi + + echo " Passed" +} + +function perf_test() +{ + USE_DMA=$1 + + if [[ $USE_DMA == "1" ]]; then + WITH="with" + else + WITH="without" + fi + + _modprobe ntb_perf run_order=$PERF_RUN_ORDER \ + max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA + + echo "Running local perf test $WITH DMA" + write_file "" $LOCAL_PERF/run + echo -n " " + read_file $LOCAL_PERF/run + echo " Passed" + + echo "Running remote perf test $WITH DMA" + write_file "" $REMOTE_PERF/run + echo -n " " + read_file $LOCAL_PERF/run + echo " Passed" + + _modprobe -r ntb_perf +} + +function ntb_tool_tests() +{ + LOCAL_TOOL=$DEBUGFS/ntb_tool/$LOCAL_DEV + REMOTE_TOOL=$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV + + echo "Starting ntb_tool tests..." + + _modprobe ntb_tool + + write_file Y $LOCAL_TOOL/link_event + write_file Y $REMOTE_TOOL/link_event + + link_test $LOCAL_TOOL $REMOTE_TOOL + link_test $REMOTE_TOOL $LOCAL_TOOL + + for PEER_TRANS in $(ls $LOCAL_TOOL/peer_trans*); do + PT=$(basename $PEER_TRANS) + write_file $MW_SIZE $LOCAL_TOOL/$PT + write_file $MW_SIZE $REMOTE_TOOL/$PT + done + + doorbell_test $LOCAL_TOOL $REMOTE_TOOL + doorbell_test $REMOTE_TOOL $LOCAL_TOOL + scratchpad_test $LOCAL_TOOL $REMOTE_TOOL + scratchpad_test $REMOTE_TOOL $LOCAL_TOOL + + for MW in $(ls $LOCAL_TOOL/mw*); do + MW=$(basename $MW) + + mw_test $MW $LOCAL_TOOL $REMOTE_TOOL + mw_test $MW $REMOTE_TOOL $LOCAL_TOOL + done + + _modprobe -r ntb_tool +} + +function ntb_pingpong_tests() +{ + LOCAL_PP=$DEBUGFS/ntb_pingpong/$LOCAL_DEV + REMOTE_PP=$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV + + echo "Starting ntb_pingpong tests..." + + _modprobe ntb_pingpong + + pingpong_test $LOCAL_PP $REMOTE_PP + + _modprobe -r ntb_pingpong +} + +function ntb_perf_tests() +{ + LOCAL_PERF=$DEBUGFS/ntb_perf/$LOCAL_DEV + REMOTE_PERF=$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV + + echo "Starting ntb_perf tests..." + + perf_test 0 + + if [[ $RUN_DMA_TESTS ]]; then + perf_test 1 + fi +} + +function cleanup() +{ + set +e + _modprobe -r ntb_tool 2> /dev/null + _modprobe -r ntb_perf 2> /dev/null + _modprobe -r ntb_pingpong 2> /dev/null + _modprobe -r ntb_transport 2> /dev/null + set -e +} + +cleanup + +if ! [[ $$DONT_CLEANUP ]]; then + trap cleanup EXIT +fi + +if [ "$(id -u)" != "0" ]; then + echo "This script must be run as root" 1>&2 + exit 1 +fi + +if [[ "$LIST_DEVS" == TRUE ]]; then + echo "Local Devices:" + ls -1 /sys/bus/ntb/devices + echo + + if [[ "$REMOTE_HOST" != "" ]]; then + echo "Remote Devices:" + ssh $REMOTE_HOST ls -1 /sys/bus/ntb/devices + fi + + exit 0 +fi + +if [[ "$LOCAL_DEV" == $"" ]] || [[ "$REMOTE_DEV" == $"" ]]; then + show_help + exit 1 +fi + +ntb_tool_tests +echo +ntb_pingpong_tests +echo +ntb_perf_tests +echo From 4089527388808ca380af908fdbdd3d723cc9e751 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Fri, 22 Jul 2016 09:38:22 -0400 Subject: [PATCH 16/17] NTB: ntb_hw_intel: show BAR size in debugfs info It will be useful to know the hardware configured BAR size to diagnose issues with NTB memory windows. Signed-off-by: Allen Hubbe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 39 ++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 40d04ef5da9e86..5efd03705e5cee 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -551,13 +551,15 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { struct intel_ntb_dev *ndev; + struct pci_dev *pdev; void __iomem *mmio; char *buf; size_t buf_size; ssize_t ret, off; - union { u64 v64; u32 v32; u16 v16; } u; + union { u64 v64; u32 v32; u16 v16; u8 v8; } u; ndev = filp->private_data; + pdev = ndev_pdev(ndev); mmio = ndev->self_mmio; buf_size = min(count, 0x800ul); @@ -631,6 +633,41 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "Doorbell Bell -\t\t%#llx\n", u.v64); + off += scnprintf(buf + off, buf_size - off, + "\nNTB Window Size:\n"); + + pci_read_config_byte(pdev, XEON_PBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_PBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_PBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_PBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "PBAR5SZ %hhu\n", u.v8); + } + + pci_read_config_byte(pdev, XEON_SBAR23SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR23SZ %hhu\n", u.v8); + if (!ndev->bar4_split) { + pci_read_config_byte(pdev, XEON_SBAR45SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR45SZ %hhu\n", u.v8); + } else { + pci_read_config_byte(pdev, XEON_SBAR4SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR4SZ %hhu\n", u.v8); + pci_read_config_byte(pdev, XEON_SBAR5SZ_OFFSET, &u.v8); + off += scnprintf(buf + off, buf_size - off, + "SBAR5SZ %hhu\n", u.v8); + } + off += scnprintf(buf + off, buf_size - off, "\nNTB Incoming XLAT:\n"); From 95f1464f695055c72de6044d7c8a2a7a1e0c7ea2 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Fri, 22 Jul 2016 09:38:23 -0400 Subject: [PATCH 17/17] NTB: ntb_hw_intel: use local variable pdev Clean up duplicated expression by replacing it with the equivalent local variable pdev. Signed-off-by: Allen Hubbe Acked-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 5efd03705e5cee..0d5c29ae51def6 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -706,7 +706,7 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, "LMT45 -\t\t\t%#018llx\n", u.v64); } - if (pdev_is_xeon(ndev->ntb.pdev)) { + if (pdev_is_xeon(pdev)) { if (ntb_topo_is_b2b(ndev->ntb.topo)) { off += scnprintf(buf + off, buf_size - off, "\nNTB Outgoing B2B XLAT:\n"); @@ -787,22 +787,22 @@ static ssize_t ndev_debugfs_read(struct file *filp, char __user *ubuf, off += scnprintf(buf + off, buf_size - off, "\nXEON NTB Hardware Errors:\n"); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_DEVSTS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "DEVSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_word(ndev->ntb.pdev, + if (!pci_read_config_word(pdev, XEON_LINK_STATUS_OFFSET, &u.v16)) off += scnprintf(buf + off, buf_size - off, "LNKSTS -\t\t%#06x\n", u.v16); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_UNCERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "UNCERRSTS -\t\t%#06x\n", u.v32); - if (!pci_read_config_dword(ndev->ntb.pdev, + if (!pci_read_config_dword(pdev, XEON_CORERRSTS_OFFSET, &u.v32)) off += scnprintf(buf + off, buf_size - off, "CORERRSTS -\t\t%#06x\n", u.v32);