Skip to content

Commit

Permalink
habanalabs: add uapi to retrieve device utilization
Browse files Browse the repository at this point in the history
Users and sysadmins usually want to know what is the device utilization as
a level 0 indication if they are efficiently using the device.

Add a new opcode to the INFO IOCTL that will return the device utilization
over the last period of 100-1000ms. The return value is 0-100,
representing as percentage the total utilization rate.

Signed-off-by: Oded Gabbay <[email protected]>
Reviewed-by: Omer Shpigelman <[email protected]>
  • Loading branch information
ogabbay committed Sep 5, 2019
1 parent 413cf57 commit 75b3cb2
Show file tree
Hide file tree
Showing 7 changed files with 222 additions and 23 deletions.
20 changes: 16 additions & 4 deletions drivers/misc/habanalabs/command_submission.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,23 @@ static void cs_do_release(struct kref *ref)

/* We also need to update CI for internal queues */
if (cs->submitted) {
int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
hdev->asic_funcs->hw_queues_lock(hdev);

WARN_ONCE((cs_cnt < 0),
"hl%d: error in CS active cnt %d\n",
hdev->id, cs_cnt);
hdev->cs_active_cnt--;
if (!hdev->cs_active_cnt) {
struct hl_device_idle_busy_ts *ts;

ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
ts->busy_to_idle_ts = ktime_get();

if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
hdev->idle_busy_ts_idx = 0;
} else if (hdev->cs_active_cnt < 0) {
dev_crit(hdev->dev, "CS active cnt %d is negative\n",
hdev->cs_active_cnt);
}

hdev->asic_funcs->hw_queues_unlock(hdev);

hl_int_hw_queue_update_ci(cs);

Expand Down
116 changes: 115 additions & 1 deletion drivers/misc/habanalabs/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,14 @@ static int device_early_init(struct hl_device *hdev)
goto free_eq_wq;
}

hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
sizeof(struct hl_device_idle_busy_ts),
(GFP_KERNEL | __GFP_ZERO));
if (!hdev->idle_busy_ts_arr) {
rc = -ENOMEM;
goto free_chip_info;
}

hl_cb_mgr_init(&hdev->kernel_cb_mgr);

mutex_init(&hdev->send_cpu_message_lock);
Expand All @@ -303,10 +311,11 @@ static int device_early_init(struct hl_device *hdev)
INIT_LIST_HEAD(&hdev->fpriv_list);
mutex_init(&hdev->fpriv_list_lock);
atomic_set(&hdev->in_reset, 0);
atomic_set(&hdev->cs_active_cnt, 0);

return 0;

free_chip_info:
kfree(hdev->hl_chip_info);
free_eq_wq:
destroy_workqueue(hdev->eq_wq);
free_cq_wq:
Expand Down Expand Up @@ -336,6 +345,7 @@ static void device_early_fini(struct hl_device *hdev)

hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);

kfree(hdev->idle_busy_ts_arr);
kfree(hdev->hl_chip_info);

destroy_workqueue(hdev->eq_wq);
Expand Down Expand Up @@ -451,6 +461,102 @@ static void device_late_fini(struct hl_device *hdev)
hdev->late_init_done = false;
}

uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms)
{
struct hl_device_idle_busy_ts *ts;
ktime_t zero_ktime, curr = ktime_get();
u32 overlap_cnt = 0, last_index = hdev->idle_busy_ts_idx;
s64 period_us, last_start_us, last_end_us, last_busy_time_us,
total_busy_time_us = 0, total_busy_time_ms;

zero_ktime = ktime_set(0, 0);
period_us = period_ms * USEC_PER_MSEC;
ts = &hdev->idle_busy_ts_arr[last_index];

/* check case that device is currently in idle */
if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime) &&
!ktime_compare(ts->idle_to_busy_ts, zero_ktime)) {

last_index--;
/* Handle case idle_busy_ts_idx was 0 */
if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;

ts = &hdev->idle_busy_ts_arr[last_index];
}

while (overlap_cnt < HL_IDLE_BUSY_TS_ARR_SIZE) {
/* Check if we are in last sample case. i.e. if the sample
* begun before the sampling period. This could be a real
* sample or 0 so need to handle both cases
*/
last_start_us = ktime_to_us(
ktime_sub(curr, ts->idle_to_busy_ts));

if (last_start_us > period_us) {

/* First check two cases:
* 1. If the device is currently busy
* 2. If the device was idle during the whole sampling
* period
*/

if (!ktime_compare(ts->busy_to_idle_ts, zero_ktime)) {
/* Check if the device is currently busy */
if (ktime_compare(ts->idle_to_busy_ts,
zero_ktime))
return 100;

/* We either didn't have any activity or we
* reached an entry which is 0. Either way,
* exit and return what was accumulated so far
*/
break;
}

/* If sample has finished, check it is relevant */
last_end_us = ktime_to_us(
ktime_sub(curr, ts->busy_to_idle_ts));

if (last_end_us > period_us)
break;

/* It is relevant so add it but with adjustment */
last_busy_time_us = ktime_to_us(
ktime_sub(ts->busy_to_idle_ts,
ts->idle_to_busy_ts));
total_busy_time_us += last_busy_time_us -
(last_start_us - period_us);
break;
}

/* Check if the sample is finished or still open */
if (ktime_compare(ts->busy_to_idle_ts, zero_ktime))
last_busy_time_us = ktime_to_us(
ktime_sub(ts->busy_to_idle_ts,
ts->idle_to_busy_ts));
else
last_busy_time_us = ktime_to_us(
ktime_sub(curr, ts->idle_to_busy_ts));

total_busy_time_us += last_busy_time_us;

last_index--;
/* Handle case idle_busy_ts_idx was 0 */
if (last_index > HL_IDLE_BUSY_TS_ARR_SIZE)
last_index = HL_IDLE_BUSY_TS_ARR_SIZE - 1;

ts = &hdev->idle_busy_ts_arr[last_index];

overlap_cnt++;
}

total_busy_time_ms = DIV_ROUND_UP_ULL(total_busy_time_us,
USEC_PER_MSEC);

return DIV_ROUND_UP_ULL(total_busy_time_ms * 100, period_ms);
}

/*
* hl_device_set_frequency - set the frequency of the device
*
Expand Down Expand Up @@ -808,6 +914,14 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_reset(hdev, &hdev->completion_queue[i]);

hdev->idle_busy_ts_idx = 0;
hdev->idle_busy_ts_arr[0].busy_to_idle_ts = ktime_set(0, 0);
hdev->idle_busy_ts_arr[0].idle_to_busy_ts = ktime_set(0, 0);

if (hdev->cs_active_cnt)
dev_crit(hdev->dev, "CS active cnt %d is not 0 during reset\n",
hdev->cs_active_cnt);

mutex_lock(&hdev->fpriv_list_lock);

/* Make sure the context switch phase will run again */
Expand Down
23 changes: 21 additions & 2 deletions drivers/misc/habanalabs/habanalabs.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
/* MUST BE POWER OF 2 and larger than 1 */
#define HL_MAX_PENDING_CS 64

#define HL_IDLE_BUSY_TS_ARR_SIZE 4096

/* Memory */
#define MEM_HASH_TABLE_BITS 7 /* 1 << 7 buckets */

Expand Down Expand Up @@ -1156,6 +1158,16 @@ struct hl_device_reset_work {
struct hl_device *hdev;
};

/**
* struct hl_device_idle_busy_ts - used for calculating device utilization rate.
* @idle_to_busy_ts: timestamp where device changed from idle to busy.
* @busy_to_idle_ts: timestamp where device changed from busy to idle.
*/
struct hl_device_idle_busy_ts {
ktime_t idle_to_busy_ts;
ktime_t busy_to_idle_ts;
};

/**
* struct hl_device - habanalabs device structure.
* @pdev: pointer to PCI device, can be NULL in case of simulator device.
Expand Down Expand Up @@ -1203,19 +1215,22 @@ struct hl_device_reset_work {
* when a user opens the device
* @fpriv_list_lock: protects the fpriv_list
* @compute_ctx: current compute context executing.
* @idle_busy_ts_arr: array to hold time stamps of transitions from idle to busy
* and vice-versa
* @dram_used_mem: current DRAM memory consumption.
* @timeout_jiffies: device CS timeout value.
* @max_power: the max power of the device, as configured by the sysadmin. This
* value is saved so in case of hard-reset, KMD will restore this
* value and update the F/W after the re-initialization
* @in_reset: is device in reset flow.
* @curr_pll_profile: current PLL profile.
* @cs_active_cnt: number of active command submissions on this device (active
* means already in H/W queues)
* @curr_pll_profile: current PLL profile.
* @major: habanalabs KMD major.
* @high_pll: high PLL profile frequency.
* @soft_reset_cnt: number of soft reset since KMD loading.
* @hard_reset_cnt: number of hard reset since KMD loading.
* @idle_busy_ts_idx: index of current entry in idle_busy_ts_arr
* @id: device minor.
* @id_control: minor of the control device
* @disabled: is device disabled.
Expand Down Expand Up @@ -1285,16 +1300,19 @@ struct hl_device {

struct hl_ctx *compute_ctx;

struct hl_device_idle_busy_ts *idle_busy_ts_arr;

atomic64_t dram_used_mem;
u64 timeout_jiffies;
u64 max_power;
atomic_t in_reset;
atomic_t cs_active_cnt;
enum hl_pll_frequency curr_pll_profile;
int cs_active_cnt;
u32 major;
u32 high_pll;
u32 soft_reset_cnt;
u32 hard_reset_cnt;
u32 idle_busy_ts_idx;
u16 id;
u16 id_control;
u8 disabled;
Expand Down Expand Up @@ -1457,6 +1475,7 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
void hl_hpriv_get(struct hl_fpriv *hpriv);
void hl_hpriv_put(struct hl_fpriv *hpriv);
int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq);
uint32_t hl_device_utilization(struct hl_device *hdev, uint32_t period_ms);

int hl_build_hwmon_channel_info(struct hl_device *hdev,
struct armcp_sensor *sensors_arr);
Expand Down
27 changes: 27 additions & 0 deletions drivers/misc/habanalabs/habanalabs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,29 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
return rc;
}

static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
{
struct hl_info_device_utilization device_util = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;

if ((!max_size) || (!out))
return -EINVAL;

if ((args->period_ms < 100) || (args->period_ms > 1000) ||
(args->period_ms % 100)) {
dev_err(hdev->dev,
"period %u must be between 100 - 1000 and must be divisible by 100\n",
args->period_ms);
return -EINVAL;
}

device_util.utilization = hl_device_utilization(hdev, args->period_ms);

return copy_to_user(out, &device_util,
min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
}

static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
Expand Down Expand Up @@ -239,6 +262,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
rc = hw_idle(hdev, args);
break;

case HL_INFO_DEVICE_UTILIZATION:
rc = device_utilization(hdev, args);
break;

default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
Expand Down
8 changes: 7 additions & 1 deletion drivers/misc/habanalabs/hw_queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
spin_unlock(&hdev->hw_queues_mirror_lock);
}

atomic_inc(&hdev->cs_active_cnt);
if (!hdev->cs_active_cnt++) {
struct hl_device_idle_busy_ts *ts;

ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
ts->busy_to_idle_ts = ktime_set(0, 0);
ts->idle_to_busy_ts = ktime_get();
}

list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
if (job->ext_queue)
Expand Down
2 changes: 2 additions & 0 deletions drivers/misc/habanalabs/include/goya/goya.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,6 @@

#define TPC_MAX_NUM 8

#define MME_MAX_NUM 1

#endif /* GOYA_H */
49 changes: 34 additions & 15 deletions include/uapi/misc/habanalabs.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,29 @@ enum hl_device_status {

/* Opcode for management ioctl
*
* HW_IP_INFO - Receive information about different IP blocks in the
* device.
* HL_INFO_HW_EVENTS - Receive an array describing how many times each event
* occurred since the last hard reset.
* HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the
* specific context. This is relevant only for GOYA device.
* HL_INFO_HW_IDLE - Retrieve information about the idle status of each
* internal engine.
* HW_IP_INFO - Receive information about different IP blocks in the
* device.
* HL_INFO_HW_EVENTS - Receive an array describing how many times each event
* occurred since the last hard reset.
* HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the
* specific context. This is relevant only for devices
* where the dram is managed by the kernel driver
* HL_INFO_HW_IDLE - Retrieve information about the idle status of each
* internal engine.
* HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't
* require an open context.
* HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device
* over the last period specified by the user.
* The period can be between 100ms to 1s, in
* resolution of 100ms. The return value is a
* percentage of the utilization rate.
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
#define HL_INFO_DRAM_USAGE 2
#define HL_INFO_HW_IDLE 3
#define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
#define HL_INFO_DRAM_USAGE 2
#define HL_INFO_HW_IDLE 3
#define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_DEVICE_UTILIZATION 6

#define HL_INFO_VERSION_MAX_LEN 128

Expand Down Expand Up @@ -134,6 +141,11 @@ struct hl_info_device_status {
__u32 pad;
};

struct hl_info_device_utilization {
__u32 utilization;
__u32 pad;
};

struct hl_info_args {
/* Location of relevant struct in userspace */
__u64 return_pointer;
Expand All @@ -149,8 +161,15 @@ struct hl_info_args {
/* HL_INFO_* */
__u32 op;

/* Context ID - Currently not in use */
__u32 ctx_id;
union {
/* Context ID - Currently not in use */
__u32 ctx_id;
/* Period value for utilization rate (100ms - 1000ms, in 100ms
* resolution.
*/
__u32 period_ms;
};

__u32 pad;
};

Expand Down

0 comments on commit 75b3cb2

Please sign in to comment.