Skip to content

Commit

Permalink
habanalabs: rename soft reset to compute reset
Browse files Browse the repository at this point in the history
Doing compute reset can be the traditional inference soft reset
that is supported only in Goya.

Or it can be the new reset upon device release, which is supported
in Gaudi2 and above.

Therefore, wherever suitable, use the terminology of compute reset
instead of soft reset.

Signed-off-by: Oded Gabbay <[email protected]>
  • Loading branch information
ogabbay committed Jul 12, 2022
1 parent e3b20f3 commit 0b0ae02
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 30 deletions.
28 changes: 14 additions & 14 deletions drivers/misc/habanalabs/common/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
enum hl_device_status status;

if (hdev->reset_info.in_reset) {
if (hdev->reset_info.is_in_soft_reset)
if (hdev->reset_info.in_compute_reset)
status = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE;
else
status = HL_DEVICE_STATUS_IN_RESET;
Expand Down Expand Up @@ -1306,7 +1306,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
delay_reset = !!(flags & HL_DRV_RESET_DELAY);

if (!hard_reset && !hdev->asic_prop.supports_soft_reset) {
if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
hard_instead_soft = true;
hard_reset = true;
}
Expand All @@ -1329,7 +1329,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
}

if (hard_instead_soft)
dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
dev_dbg(hdev->dev, "Doing hard-reset instead of compute reset\n");

do_reset:
/* Re-entry of reset thread */
Expand All @@ -1345,17 +1345,17 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
/* Block future CS/VM/JOB completion operations */
spin_lock(&hdev->reset_info.lock);
if (hdev->reset_info.in_reset) {
/* We only allow scheduling of a hard reset during soft reset */
if (hard_reset && hdev->reset_info.is_in_soft_reset)
/* We only allow scheduling of a hard reset during compute reset */
if (hard_reset && hdev->reset_info.in_compute_reset)
hdev->reset_info.hard_reset_schedule_flags = flags;
spin_unlock(&hdev->reset_info.lock);
return 0;
}

/* This still allows the completion of some KDMA ops
* Update this before in_reset because is_in_soft_reset implies we are in reset
* Update this before in_reset because in_compute_reset implies we are in reset
*/
hdev->reset_info.is_in_soft_reset = !hard_reset;
hdev->reset_info.in_compute_reset = !hard_reset;

hdev->reset_info.in_reset = 1;

Expand Down Expand Up @@ -1562,7 +1562,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
dev_err(hdev->dev,
"Failed late init in reset after device release\n");
else
dev_err(hdev->dev, "Failed late init after soft reset\n");
dev_err(hdev->dev, "Failed late init after compute reset\n");
goto out_err;
}
}
Expand All @@ -1574,7 +1574,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
}

spin_lock(&hdev->reset_info.lock);
hdev->reset_info.is_in_soft_reset = 0;
hdev->reset_info.in_compute_reset = 0;

/* Schedule hard reset only if requested and if not already in hard reset.
* We keep 'in_reset' enabled, so no other reset can go in during the hard
Expand Down Expand Up @@ -1604,11 +1604,11 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
*/
hdev->asic_funcs->enable_events_from_fw(hdev);
} else if (!reset_upon_device_release) {
hdev->reset_info.soft_reset_cnt++;
hdev->reset_info.compute_reset_cnt++;
}

if (schedule_hard_reset) {
dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n");
dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
flags = hdev->reset_info.hard_reset_schedule_flags;
hdev->reset_info.hard_reset_schedule_flags = 0;
hdev->disabled = true;
Expand All @@ -1623,7 +1623,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hdev->disabled = true;

spin_lock(&hdev->reset_info.lock);
hdev->reset_info.is_in_soft_reset = 0;
hdev->reset_info.in_compute_reset = 0;

if (hard_reset) {
dev_err(hdev->dev, "Failed to reset! Device is NOT usable\n");
Expand All @@ -1637,8 +1637,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
goto again;
} else {
spin_unlock(&hdev->reset_info.lock);
dev_err(hdev->dev, "Failed to do soft-reset\n");
hdev->reset_info.soft_reset_cnt++;
dev_err(hdev->dev, "Failed to do compute reset\n");
hdev->reset_info.compute_reset_cnt++;
flags |= HL_DRV_RESET_HARD;
hard_reset = true;
goto again;
Expand Down
4 changes: 2 additions & 2 deletions drivers/misc/habanalabs/common/firmware_if.c
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
mutex_lock(&hdev->send_cpu_message_lock);

/* CPU-CP messages can be sent during soft-reset */
if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
rc = 0;
goto out;
}
Expand Down Expand Up @@ -314,7 +314,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
/* If FW performed reset just before sending it a packet, we will get a timeout.
* This is expected behavior, hence no need for error message.
*/
if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.is_in_soft_reset)
if (!hl_device_operational(hdev, NULL) && !hdev->reset_info.in_compute_reset)
dev_dbg(hdev->dev, "Device CPU packet timeout (0x%x) due to FW reset\n",
tmp);
else
Expand Down
16 changes: 8 additions & 8 deletions drivers/misc/habanalabs/common/habanalabs.h
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ struct hl_hints_range {
* false otherwise.
* @use_get_power_for_reset_history: To support backward compatibility for Goya
* and Gaudi
* @supports_soft_reset: is soft reset supported.
* @supports_compute_reset: is a reset which is not a hard-reset supported by this asic.
* @allow_inference_soft_reset: true if the ASIC supports soft reset that is
* initiated by user or TDR. This is only true
* in inference ASICs, as there is no real-world
Expand Down Expand Up @@ -760,7 +760,7 @@ struct asic_fixed_properties {
u8 dynamic_fw_load;
u8 gic_interrupts_enable;
u8 use_get_power_for_reset_history;
u8 supports_soft_reset;
u8 supports_compute_reset;
u8 allow_inference_soft_reset;
u8 configurable_stop_on_err;
u8 set_max_power_on_device_init;
Expand Down Expand Up @@ -2960,12 +2960,12 @@ struct last_error_session_info {
/**
* struct hl_reset_info - holds current device reset information.
* @lock: lock to protect critical reset flows.
* @soft_reset_cnt: number of soft reset since the driver was loaded.
* @hard_reset_cnt: number of hard reset since the driver was loaded.
* @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset,
* @compute_reset_cnt: number of compte resets since the driver was loaded.
* @hard_reset_cnt: number of hard resets since the driver was loaded.
* @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset,
* here we hold the hard reset flags.
* @in_reset: is device in reset flow.
* @is_in_soft_reset: Device is currently in soft reset process.
* @in_compute_reset: Device is currently in reset but not in hard-reset.
* @needs_reset: true if reset_on_lockup is false and device should be reset
* due to lockup.
* @hard_reset_pending: is there a hard reset work pending.
Expand All @@ -2980,11 +2980,11 @@ struct last_error_session_info {
*/
struct hl_reset_info {
spinlock_t lock;
u32 soft_reset_cnt;
u32 compute_reset_cnt;
u32 hard_reset_cnt;
u32 hard_reset_schedule_flags;
u8 in_reset;
u8 is_in_soft_reset;
u8 in_compute_reset;
u8 needs_reset;
u8 hard_reset_pending;

Expand Down
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/common/habanalabs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
return -EINVAL;

reset_count.hard_reset_cnt = hdev->reset_info.hard_reset_cnt;
reset_count.soft_reset_cnt = hdev->reset_info.soft_reset_cnt;
reset_count.soft_reset_cnt = hdev->reset_info.compute_reset_cnt;

return copy_to_user(out, &reset_count,
min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
Expand Down
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/common/irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
*/
dma_rmb();

if (hdev->disabled && !hdev->reset_info.is_in_soft_reset) {
if (hdev->disabled && !hdev->reset_info.in_compute_reset) {
dev_warn(hdev->dev, "Device disabled but received an EQ event\n");
goto skip_irq;
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/common/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ static ssize_t soft_reset_cnt_show(struct device *dev,
{
struct hl_device *hdev = dev_get_drvdata(dev);

return sprintf(buf, "%d\n", hdev->reset_info.soft_reset_cnt);
return sprintf(buf, "%d\n", hdev->reset_info.compute_reset_cnt);
}

static ssize_t hard_reset_cnt_show(struct device *dev,
Expand Down
4 changes: 2 additions & 2 deletions drivers/misc/habanalabs/gaudi2/gaudi2.c
Original file line number Diff line number Diff line change
Expand Up @@ -2392,7 +2392,7 @@ static int gaudi2_cpucp_info_get(struct hl_device *hdev)
/* No point of asking this information again when not doing hard reset, as the device
* CPU hasn't been reset
*/
if (hdev->reset_info.is_in_soft_reset)
if (hdev->reset_info.in_compute_reset)
return 0;

rc = hl_fw_cpucp_handshake(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_DEV_STS1, mmCPU_BOOT_ERR0,
Expand Down Expand Up @@ -3014,7 +3014,7 @@ static int gaudi2_sw_init(struct hl_device *hdev)
hdev->supports_cb_mapping = true;
hdev->supports_wait_for_multi_cs = false;

prop->supports_soft_reset = true;
prop->supports_compute_reset = true;

hdev->asic_funcs->set_pci_memory_regions(hdev);

Expand Down
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/goya/goya.c
Original file line number Diff line number Diff line change
Expand Up @@ -1038,7 +1038,7 @@ static int goya_sw_init(struct hl_device *hdev)

spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true;
hdev->asic_prop.supports_soft_reset = true;
hdev->asic_prop.supports_compute_reset = true;
hdev->asic_prop.allow_inference_soft_reset = true;
hdev->supports_wait_for_multi_cs = false;

Expand Down

0 comments on commit 0b0ae02

Please sign in to comment.