Skip to content

Commit

Permalink
habanalabs: remove compute context pointer
Browse files Browse the repository at this point in the history
It was an error to save the compute context's pointer in the device
structure, as it allowed its use without proper ref-cnt.

Change the variable to a flag that only indicates whether there is
an active compute context. Code that needs the pointer will now
be forced to use proper internal APIs to get the pointer.

Signed-off-by: Oded Gabbay <[email protected]>
  • Loading branch information
ogabbay committed Dec 26, 2021
1 parent 4337b50 commit 5b90e59
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 14 deletions.
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/common/context.c
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
hpriv->ctx = ctx;

/* TODO: remove the following line for multiple process support */
hdev->compute_ctx = ctx;
hdev->is_compute_ctx_active = true;

return 0;

Expand Down
10 changes: 5 additions & 5 deletions drivers/misc/habanalabs/common/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ static void hpriv_release(struct kref *ref)
|| hdev->reset_upon_device_release)
hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);

/* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
/* Now we can mark the compute_ctx as not active. Even if a reset is running in a different
* thread, we don't care because the in_reset is marked so if a user will try to open
* the device it will fail on that, even if compute_ctx is NULL.
* the device it will fail on that, even if compute_ctx is false.
*/
mutex_lock(&hdev->fpriv_list_lock);
hdev->compute_ctx = NULL;
hdev->is_compute_ctx_active = false;
mutex_unlock(&hdev->fpriv_list_lock);

kfree(hpriv);
Expand Down Expand Up @@ -1150,7 +1150,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
goto out_err;
}

hdev->compute_ctx = NULL;
hdev->is_compute_ctx_active = false;

rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
if (rc) {
Expand Down Expand Up @@ -1403,7 +1403,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
goto mmu_fini;
}

hdev->compute_ctx = NULL;
hdev->is_compute_ctx_active = false;

hdev->asic_funcs->state_dump_init(hdev);

Expand Down
5 changes: 2 additions & 3 deletions drivers/misc/habanalabs/common/habanalabs.h
Original file line number Diff line number Diff line change
Expand Up @@ -2503,7 +2503,6 @@ struct last_error_session_info {
* @fpriv_list: list of file private data structures. Each structure is created
* when a user opens the device
* @fpriv_list_lock: protects the fpriv_list
* @compute_ctx: current compute context executing.
* @aggregated_cs_counters: aggregated cs counters among all contexts
* @mmu_priv: device-specific MMU data.
* @mmu_func: device-related MMU functions.
Expand Down Expand Up @@ -2601,6 +2600,7 @@ struct last_error_session_info {
* cases where Linux was not loaded to device CPU
* @supports_wait_for_multi_cs: true if wait for multi CS is supported
* @is_in_soft_reset: Device is currently in soft reset process.
* @is_compute_ctx_active: Whether there is an active compute context executing.
*/
struct hl_device {
struct pci_dev *pdev;
Expand Down Expand Up @@ -2656,8 +2656,6 @@ struct hl_device {
struct list_head fpriv_list;
struct mutex fpriv_list_lock;

struct hl_ctx *compute_ctx;

struct hl_cs_counters_atomic aggregated_cs_counters;

struct hl_mmu_priv mmu_priv;
Expand Down Expand Up @@ -2730,6 +2728,7 @@ struct hl_device {
u8 supports_wait_for_multi_cs;
u8 stream_master_qid_arr_size;
u8 is_in_soft_reset;
u8 is_compute_ctx_active;

/* Parameters for bring-up */
u64 nic_ports_mask;
Expand Down
2 changes: 1 addition & 1 deletion drivers/misc/habanalabs/common/habanalabs_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
goto out_err;
}

if (hdev->compute_ctx) {
if (hdev->is_compute_ctx_active) {
dev_dbg_ratelimited(hdev->dev,
"Can't open %s because another user is working on it\n",
dev_name(hdev->dev));
Expand Down
4 changes: 2 additions & 2 deletions drivers/misc/habanalabs/goya/goya.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0

/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/

Expand Down Expand Up @@ -827,7 +827,7 @@ static void goya_set_freq_to_low_job(struct work_struct *work)

mutex_lock(&hdev->fpriv_list_lock);

if (!hdev->compute_ctx)
if (!hdev->is_compute_ctx_active)
goya_set_frequency(hdev, PLL_LOW);

mutex_unlock(&hdev->fpriv_list_lock);
Expand Down
4 changes: 2 additions & 2 deletions drivers/misc/habanalabs/goya/goya_hwmgr.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0

/*
* Copyright 2016-2019 HabanaLabs, Ltd.
* Copyright 2016-2021 HabanaLabs, Ltd.
* All Rights Reserved.
*/

Expand Down Expand Up @@ -258,7 +258,7 @@ static ssize_t pm_mng_profile_store(struct device *dev,

mutex_lock(&hdev->fpriv_list_lock);

if (hdev->compute_ctx) {
if (hdev->is_compute_ctx_active) {
dev_err(hdev->dev,
"Can't change PM profile while compute context is opened on the device\n");
count = -EPERM;
Expand Down

0 comments on commit 5b90e59

Please sign in to comment.