[2/3] venus: Rework recovery mechanism

Message ID 20200730114632.6717-3-stanimir.varbanov@linaro.org
State New
Headers show
Series
  • Venus - recovery from firmware crash
Related show

Commit Message

Stanimir Varbanov July 30, 2020, 11:46 a.m.
After power domains and clock restructuring the recovery for
sdm845 and v4 did not work properly. Fix that by reworking the
recovery function and the sequence.

Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>

---
 drivers/media/platform/qcom/venus/core.c      | 24 ++++++++++---------
 drivers/media/platform/qcom/venus/hfi_venus.c | 11 ---------
 2 files changed, 13 insertions(+), 22 deletions(-)

-- 
2.17.1

Comments

Fritz Koenig Aug. 7, 2020, 9:29 p.m. | #1
On Thu, Jul 30, 2020 at 4:47 AM Stanimir Varbanov
<stanimir.varbanov@linaro.org> wrote:
>

> After power domains and clock restructuring the recovery for

> sdm845 and v4 did not work properly. Fix that by reworking the

> recovery function and the sequence.

>

> Signed-off-by: Stanimir Varbanov <stanimir.varbanov@linaro.org>

> ---

>  drivers/media/platform/qcom/venus/core.c      | 24 ++++++++++---------

>  drivers/media/platform/qcom/venus/hfi_venus.c | 11 ---------

>  2 files changed, 13 insertions(+), 22 deletions(-)

>

> diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c

> index 203c6538044f..46f6e34d435a 100644

> --- a/drivers/media/platform/qcom/venus/core.c

> +++ b/drivers/media/platform/qcom/venus/core.c

> @@ -6,6 +6,7 @@

>  #include <linux/init.h>

>  #include <linux/interconnect.h>

>  #include <linux/ioctl.h>

> +#include <linux/delay.h>

>  #include <linux/list.h>

>  #include <linux/module.h>

>  #include <linux/of_device.h>

> @@ -40,13 +41,7 @@ static void venus_event_notify(struct venus_core *core, u32 event)

>         mutex_unlock(&core->lock);

>

>         disable_irq_nosync(core->irq);

> -

> -       /*

> -        * Delay recovery to ensure venus has completed any pending cache

> -        * operations. Without this sleep, we see device reset when firmware is

> -        * unloaded after a system error.

> -        */

> -       schedule_delayed_work(&core->work, msecs_to_jiffies(100));

> +       schedule_delayed_work(&core->work, msecs_to_jiffies(10));

>  }

>

>  static const struct hfi_core_ops venus_core_ops = {

> @@ -59,23 +54,30 @@ static void venus_sys_error_handler(struct work_struct *work)

>                         container_of(work, struct venus_core, work.work);

>         int ret = 0;

>

> -       dev_warn(core->dev, "system error has occurred, starting recovery!\n");

> -

>         pm_runtime_get_sync(core->dev);

>

>         hfi_core_deinit(core, true);

> -       hfi_destroy(core);

> +

> +       dev_warn(core->dev, "system error has occurred, starting recovery!\n");

> +

>         mutex_lock(&core->lock);

> +

> +       while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))

> +               msleep(10);

> +

>         venus_shutdown(core);

>

>         pm_runtime_put_sync(core->dev);

>

> +       while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))

> +               usleep_range(1000, 1500);

> +

> +       hfi_destroy(core);

>         ret |= hfi_create(core, &venus_core_ops);

>

>         pm_runtime_get_sync(core->dev);

>

>         ret |= venus_boot(core);

> -

>         ret |= hfi_core_resume(core, true);

>

>         enable_irq(core->irq);

> diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c

> index 0d8855014ab3..3392fd177d22 100644

> --- a/drivers/media/platform/qcom/venus/hfi_venus.c

> +++ b/drivers/media/platform/qcom/venus/hfi_venus.c

> @@ -986,13 +986,6 @@ static void venus_process_msg_sys_error(struct venus_hfi_device *hdev,

>

>         venus_set_state(hdev, VENUS_STATE_DEINIT);

>

> -       /*

> -        * Once SYS_ERROR received from HW, it is safe to halt the AXI.

> -        * With SYS_ERROR, Venus FW may have crashed and HW might be

> -        * active and causing unnecessary transactions. Hence it is

> -        * safe to stop all AXI transactions from venus subsystem.

> -        */

> -       venus_halt_axi(hdev);

>         venus_sfr_print(hdev);

>  }

>

> @@ -1009,10 +1002,6 @@ static irqreturn_t venus_isr_thread(struct venus_core *core)

>         res = hdev->core->res;

>         pkt = hdev->pkt_buf;

>

> -       if (hdev->irq_status & WRAPPER_INTR_STATUS_A2HWD_MASK) {

> -               venus_sfr_print(hdev);

> -               hfi_process_watchdog_timeout(core);

> -       }

>

>         while (!venus_iface_msgq_read(hdev, pkt)) {

>                 msg_ret = hfi_process_msg_packet(core, pkt);

> --

> 2.17.1

>

Reviewed-by: Fritz Koenig <frkoenig@chromium.org>

Patch

diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c
index 203c6538044f..46f6e34d435a 100644
--- a/drivers/media/platform/qcom/venus/core.c
+++ b/drivers/media/platform/qcom/venus/core.c
@@ -6,6 +6,7 @@ 
 #include <linux/init.h>
 #include <linux/interconnect.h>
 #include <linux/ioctl.h>
+#include <linux/delay.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -40,13 +41,7 @@  static void venus_event_notify(struct venus_core *core, u32 event)
 	mutex_unlock(&core->lock);
 
 	disable_irq_nosync(core->irq);
-
-	/*
-	 * Delay recovery to ensure venus has completed any pending cache
-	 * operations. Without this sleep, we see device reset when firmware is
-	 * unloaded after a system error.
-	 */
-	schedule_delayed_work(&core->work, msecs_to_jiffies(100));
+	schedule_delayed_work(&core->work, msecs_to_jiffies(10));
 }
 
 static const struct hfi_core_ops venus_core_ops = {
@@ -59,23 +54,30 @@  static void venus_sys_error_handler(struct work_struct *work)
 			container_of(work, struct venus_core, work.work);
 	int ret = 0;
 
-	dev_warn(core->dev, "system error has occurred, starting recovery!\n");
-
 	pm_runtime_get_sync(core->dev);
 
 	hfi_core_deinit(core, true);
-	hfi_destroy(core);
+
+	dev_warn(core->dev, "system error has occurred, starting recovery!\n");
+
 	mutex_lock(&core->lock);
+
+	while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc))
+		msleep(10);
+
 	venus_shutdown(core);
 
 	pm_runtime_put_sync(core->dev);
 
+	while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0]))
+		usleep_range(1000, 1500);
+
+	hfi_destroy(core);
 	ret |= hfi_create(core, &venus_core_ops);
 
 	pm_runtime_get_sync(core->dev);
 
 	ret |= venus_boot(core);
-
 	ret |= hfi_core_resume(core, true);
 
 	enable_irq(core->irq);
diff --git a/drivers/media/platform/qcom/venus/hfi_venus.c b/drivers/media/platform/qcom/venus/hfi_venus.c
index 0d8855014ab3..3392fd177d22 100644
--- a/drivers/media/platform/qcom/venus/hfi_venus.c
+++ b/drivers/media/platform/qcom/venus/hfi_venus.c
@@ -986,13 +986,6 @@  static void venus_process_msg_sys_error(struct venus_hfi_device *hdev,
 
 	venus_set_state(hdev, VENUS_STATE_DEINIT);
 
-	/*
-	 * Once SYS_ERROR received from HW, it is safe to halt the AXI.
-	 * With SYS_ERROR, Venus FW may have crashed and HW might be
-	 * active and causing unnecessary transactions. Hence it is
-	 * safe to stop all AXI transactions from venus subsystem.
-	 */
-	venus_halt_axi(hdev);
 	venus_sfr_print(hdev);
 }
 
@@ -1009,10 +1002,6 @@  static irqreturn_t venus_isr_thread(struct venus_core *core)
 	res = hdev->core->res;
 	pkt = hdev->pkt_buf;
 
-	if (hdev->irq_status & WRAPPER_INTR_STATUS_A2HWD_MASK) {
-		venus_sfr_print(hdev);
-		hfi_process_watchdog_timeout(core);
-	}
 
 	while (!venus_iface_msgq_read(hdev, pkt)) {
 		msg_ret = hfi_process_msg_packet(core, pkt);