gxp: [Copybara Auto Merge] Merge branch 'zuma' into 'android14-gs-pixel-5.15'

gxp: update wrong comment of rescue func gxp: introduce and adopt gxp_mcu_firmware_rescue Bug: 279990832 gxp: retry waiting for PG state Bug: 279990832 (repeat) gxp: retry handshake after resetting MCU Bug: 279990832 (repeat) gxp: introduce gxp_mcu_firmware_{start,end} Bug: 279990832 (repeat) gxp: send SET_DEVICE_PROPERTIES KCI in handshake Bug: 279990832 (repeat) gxp: wait_for_pg_state_locked doesn't try power cycle Bug: 279990832 (repeat) gxp: gxp_lpm_wait_state_* always returns true in unit tests Bug: 279990832 (repeat) gxp: introduce gxp_mcu_reset Bug: 279990832 (repeat) gxp: support LPM regs related to MCU reset Bug: 279990832 (repeat) gxp: log when receive thermal throttling request gcip: check block power state before fetching resp Bug: 283659596 gcip: Set .is_block_off to gcip_mailbox Bug: 283659596 (repeat) GCIP_MAIN_REV_ID: bcabccd58d3a03b78853deb760bbd5f5ac0ed0b2 gcip: Add .is_block_off to gcip_{kci,mailbox} Bug: 283659596 (repeat) GCIP_HEADER_REV_ID: 8f682a85d7239c985f2c3effee3b7297173ce6f0 gcip: Add gcip_pm_put_async GCIP_MAIN_REV_ID: 3f14e23fb16cd54fe2d6d9a4a0a9e44f2059400d gcip: Add gcip_pm_put_async GCIP_HEADER_REV_ID: e734285a3850340570a6470cb6a3c217ad5bc3e4 gxp: use gcip_pm_get_if_powered in debug dump Bug: 285089498 gxp: fix deadlock from gcip_pm_put in debug dump Bug: 284803350 gxp: print error code on sscd failure. Bug: 281935052 gcip: include: keep track of whether a gdomain is for the default domain GCIP_HEADER_REV_ID: f3365229f906227435d0a92c23bb4fb5afd12bee gcip: iommu: legacy mode do not double map for default domain gcip: keep track of whether a gdomain is for the default domain GCIP_MAIN_REV_ID: 95f0529ba13a8ff7cc25df9b6d5469cd37f4c9c3 gcip: Call iova_domain_init_rcaches on kernel >= 5.18 Bug: 284584586 GCIP_MAIN_REV_ID: 08b837f0e99cdd98ea1a092635b74f7c904aa867 gxp: don't reset tpu_client_id when releasing block wakelock Bug: 283992379 gxp: unittests: change WARN_ON to dev_warn in gxp_unmap_buffer Bug: 283723527 gxp: fix reference count in gxp_uci_send_command Bug: 281459732 GitOrigin-RevId: 9ed8110741935a13c930d42269b9d7f995163ddf (cherry picked from https://partner-android-review.googlesource.com/q/commit:064bc298d3d9e5a9850f5d599caa76c70a08bd0c) Merged-In: I1a590947ec6c92c21000e076a31967d76601d824 Change-Id: I1a590947ec6c92c21000e076a31967d76601d824
author: Aurora zuma automerger <aurora-zuma-automerger@google.com> 2023-06-07 09:19:49 +0000
committer: Cherrypicker Worker QA <android-build-cherrypicker-worker@system.gserviceaccount.com> 2023-06-09 22:24:16 +0000
commit: 155374974bb32a98994b492c6ec495018707c9f5 (patch)
tree: 4f0e507f3ebaa56c9218b2e62d887a603131229c
parent: ba9d2b042ab2e39d9edc327f44712cd28e960979 (diff)
download: zuma-155374974bb32a98994b492c6ec495018707c9f5.tar.gz
21 files changed, 525 insertions, 176 deletions
diff --git a/callisto/lpm.h b/callisto/lpm.h
index 81115d9..6782d9f 100644
--- a/callisto/lpm.h
+++ b/callisto/lpm.h
@@ -86,6 +86,7 @@ enum lpm_psm_base {
 #define PSM_BREAK_ADDR_OFFSET 0x694
 #define PSM_GPIN_LO_RD_OFFSET 0x6A0
 #define PSM_GPIN_HI_RD_OFFSET 0x6A4
+#define PSM_GPOUT_LO_WRT_OFFSET 0x6A8
 #define PSM_GPOUT_LO_RD_OFFSET 0x6B0
 #define PSM_GPOUT_HI_RD_OFFSET 0x6B4
 #define PSM_DEBUG_STATUS_OFFSET 0x6B8
@@ -94,8 +95,7 @@ static inline u32 gxp_lpm_psm_get_status_offset(enum gxp_lpm_psm psm)
 {
 	if (psm >= LPM_NUM_PSMS)
 		return 0;
-	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) +
-	       PSM_STATUS_OFFSET;
+	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) + PSM_STATUS_OFFSET;
 }
 
 static inline u32 gxp_lpm_psm_get_start_offset(enum gxp_lpm_psm psm)
@@ -117,8 +117,36 @@ static inline u32 gxp_lpm_psm_get_state_offset(enum gxp_lpm_psm psm, uint state)
 	if (psm >= LPM_NUM_PSMS || state > 3)
 		return 0;
 
-	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) +
-	       LPM_REG_ENABLE_STATE_0 + (PSM_STATE_TABLE_SZ * state);
+	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) + LPM_REG_ENABLE_STATE_0 +
+	       (PSM_STATE_TABLE_SZ * state);
+}
+
+static inline u32 gxp_lpm_psm_get_debug_cfg_offset(enum gxp_lpm_psm psm)
+{
+	if (psm >= LPM_NUM_PSMS)
+		return 0;
+	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) + PSM_DEBUG_CFG_OFFSET;
+}
+
+static inline u32 gxp_lpm_psm_get_gpin_lo_rd_offset(enum gxp_lpm_psm psm)
+{
+	if (psm >= LPM_NUM_PSMS)
+		return 0;
+	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) + PSM_GPIN_LO_RD_OFFSET;
+}
+
+static inline u32 gxp_lpm_psm_get_gpout_lo_wrt_offset(enum gxp_lpm_psm psm)
+{
+	if (psm >= LPM_NUM_PSMS)
+		return 0;
+	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) + PSM_GPOUT_LO_WRT_OFFSET;
+}
+
+static inline u32 gxp_lpm_psm_get_gpout_lo_rd_offset(enum gxp_lpm_psm psm)
+{
+	if (psm >= LPM_NUM_PSMS)
+		return 0;
+	return GXP_LPM_PSM_0_BASE + (GXP_LPM_PSM_SIZE * psm) + PSM_GPOUT_LO_RD_OFFSET;
 }
 
 #endif /* __CALLISTO_LPM_H__ */
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-iommu.c b/gcip-kernel-driver/drivers/gcip/gcip-iommu.c
index 75509cd..50170c6 100644
--- a/gcip-kernel-driver/drivers/gcip/gcip-iommu.c
+++ b/gcip-kernel-driver/drivers/gcip/gcip-iommu.c
@@ -85,7 +85,11 @@ static int iovad_initialize_domain(struct gcip_iommu_domain *domain)
 	init_iova_domain(&domain->iova_space.iovad, dpool->granule,
 			 max_t(unsigned long, 1, dpool->base_daddr >> ilog2(dpool->granule)));
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 18, 0)
+	return iova_domain_init_rcaches(&domain->iova_space.iovad);
+#else
 	return 0;
+#endif
 }
 
 static void iovad_finalize_domain(struct gcip_iommu_domain *domain)
@@ -204,12 +208,14 @@ static ssize_t dma_iommu_map_sg(struct gcip_iommu_domain *domain, struct scatter
 	if (!nents_mapped)
 		return 0;
 
-	iova = sg_dma_address(sgl);
+	if (!domain->default_domain) {
+		iova = sg_dma_address(sgl);
 
-	ret = (ssize_t)iommu_map_sg(domain->domain, iova, sgl, nents, prot);
-	if (ret <= 0) {
-		dma_unmap_sg_attrs(domain->dev, sgl, nents, dir, attrs);
-		return 0;
+		ret = (ssize_t)iommu_map_sg(domain->domain, iova, sgl, nents, prot);
+		if (ret <= 0) {
+			dma_unmap_sg_attrs(domain->dev, sgl, nents, dir, attrs);
+			return 0;
+		}
 	}
 
 	return nents_mapped;
@@ -222,11 +228,13 @@ static void dma_iommu_unmap_sg(struct gcip_iommu_domain *domain, struct scatterl
 	size_t size = 0;
 	int i;
 
-	for_each_sg (sgl, sg, nents, i)
-		size += sg_dma_len(sg);
+	if (!domain->default_domain) {
+		for_each_sg (sgl, sg, nents, i)
+			size += sg_dma_len(sg);
 
-	if (!iommu_unmap(domain->domain, sg_dma_address(sgl), size))
-		dev_warn(domain->dev, "Failed to unmap sg");
+		if (!iommu_unmap(domain->domain, sg_dma_address(sgl), size))
+			dev_warn(domain->dev, "Failed to unmap sg");
+	}
 	dma_unmap_sg_attrs(domain->dev, sgl, nents, dir, attrs);
 }
 
@@ -443,6 +451,7 @@ struct gcip_iommu_domain *gcip_iommu_get_domain_for_dev(struct device *dev)
 
 	gdomain->dev = dev;
 	gdomain->legacy_mode = true;
+	gdomain->default_domain = true;
 
 	return gdomain;
 }
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-kci.c b/gcip-kernel-driver/drivers/gcip/gcip-kci.c
index 417b078..dd8174c 100644
--- a/gcip-kernel-driver/drivers/gcip/gcip-kci.c
+++ b/gcip-kernel-driver/drivers/gcip/gcip-kci.c
@@ -242,6 +242,13 @@ static bool gcip_kci_before_handle_resp(struct gcip_mailbox *mailbox, const void
 	return true;
 }
 
+static inline bool gcip_kci_is_block_off(struct gcip_mailbox *mailbox)
+{
+	struct gcip_kci *kci = gcip_mailbox_get_data(mailbox);
+
+	return kci->ops->is_block_off ? kci->ops->is_block_off(kci) : false;
+}
+
 static const struct gcip_mailbox_ops gcip_mailbox_ops = {
 	.get_cmd_queue_head = gcip_kci_get_cmd_queue_head,
 	.get_cmd_queue_tail = gcip_kci_get_cmd_queue_tail,
@@ -265,6 +272,7 @@ static const struct gcip_mailbox_ops gcip_mailbox_ops = {
 	.after_enqueue_cmd = gcip_kci_after_enqueue_cmd,
 	.after_fetch_resps = gcip_kci_after_fetch_resps,
 	.before_handle_resp = gcip_kci_before_handle_resp,
+	.is_block_off = gcip_kci_is_block_off,
 };
 
 /*
@@ -357,7 +365,7 @@ static int gcip_reverse_kci_remove_resp(struct gcip_reverse_kci *rkci,
 	 * Prevents the compiler from discarding and reloading its cached value additionally forces
 	 * the CPU to order against subsequent memory references.
 	 * Shamelessly stolen from:
-	 * https://www.kernel.org/doc/html/latest/core-api/circular-buffers.html
+	 * [REDACTED]
 	 */
 	head = smp_load_acquire(&rkci->head);
 	tail = rkci->tail;
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-mailbox.c b/gcip-kernel-driver/drivers/gcip/gcip-mailbox.c
index 4571aa9..afa67c8 100644
--- a/gcip-kernel-driver/drivers/gcip/gcip-mailbox.c
+++ b/gcip-kernel-driver/drivers/gcip/gcip-mailbox.c
@@ -48,6 +48,8 @@
 #define RELEASE_WAIT_LIST_LOCK(irqrestore, flags)                                                  \
 	mailbox->ops->release_wait_list_lock(mailbox, irqrestore, flags)
 
+#define IS_BLOCK_OFF() (mailbox->ops->is_block_off ? mailbox->ops->is_block_off(mailbox) : false)
+
 struct gcip_mailbox_wait_list_elem {
 	struct list_head list;
 	struct gcip_mailbox_async_resp *async_resp;
@@ -331,8 +333,8 @@ static void *gcip_mailbox_fetch_responses(struct gcip_mailbox *mailbox, u32 *tot
 	void *prev_ptr = NULL; /* Temporary pointer to realloc ret. */
 	bool atomic = false;
 
-	/* Someone is working on consuming - we can leave early. */
-	if (!ACQUIRE_RESP_QUEUE_LOCK(true, &atomic))
+	/* The block is off or someone is working on consuming - we can leave early. */
+	if (IS_BLOCK_OFF() || !ACQUIRE_RESP_QUEUE_LOCK(true, &atomic))
 		goto out;
 
 	head = GET_RESP_QUEUE_HEAD();
@@ -396,7 +398,7 @@ static int gcip_mailbox_fetch_one_response(struct gcip_mailbox *mailbox, void *r
 	u32 tail;
 	bool atomic;
 
-	if (!ACQUIRE_RESP_QUEUE_LOCK(true, &atomic))
+	if (IS_BLOCK_OFF() || !ACQUIRE_RESP_QUEUE_LOCK(true, &atomic))
 		return 0;
 
 	head = GET_RESP_QUEUE_HEAD();
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-pm.c b/gcip-kernel-driver/drivers/gcip/gcip-pm.c
index b9907a1..50bc74b 100644
--- a/gcip-kernel-driver/drivers/gcip/gcip-pm.c
+++ b/gcip-kernel-driver/drivers/gcip/gcip-pm.c
@@ -52,6 +52,14 @@ static void gcip_pm_async_power_down_work(struct work_struct *work)
 	mutex_unlock(&pm->lock);
 }
 
+/* Worker for async gcip_pm_put(). */
+static void gcip_pm_async_put_work(struct work_struct *work)
+{
+	struct gcip_pm *pm = container_of(work, struct gcip_pm, put_async_work);
+
+	gcip_pm_put(pm);
+}
+
 struct gcip_pm *gcip_pm_create(const struct gcip_pm_args *args)
 {
 	struct gcip_pm *pm;
@@ -73,6 +81,7 @@ struct gcip_pm *gcip_pm_create(const struct gcip_pm_args *args)
 
 	mutex_init(&pm->lock);
 	INIT_DELAYED_WORK(&pm->power_down_work, gcip_pm_async_power_down_work);
+	INIT_WORK(&pm->put_async_work, gcip_pm_async_put_work);
 
 	if (pm->after_create) {
 		ret = pm->after_create(pm->data);
@@ -186,6 +195,16 @@ unlock:
 	mutex_unlock(&pm->lock);
 }
 
+void gcip_pm_put_async(struct gcip_pm *pm)
+{
+	schedule_work(&pm->put_async_work);
+}
+
+void gcip_pm_flush_put_work(struct gcip_pm *pm)
+{
+	flush_work(&pm->put_async_work);
+}
+
 int gcip_pm_get_count(struct gcip_pm *pm)
 {
 	if (!pm)
diff --git a/gcip-kernel-driver/include/gcip/gcip-iommu.h b/gcip-kernel-driver/include/gcip/gcip-iommu.h
index 1797f94..34f6efa 100644
--- a/gcip-kernel-driver/include/gcip/gcip-iommu.h
+++ b/gcip-kernel-driver/include/gcip/gcip-iommu.h
@@ -104,6 +104,7 @@ struct gcip_iommu_domain {
 	struct gcip_iommu_domain_pool *domain_pool;
 	struct iommu_domain *domain;
 	bool legacy_mode;
+	bool default_domain;
 	union {
 		struct iova_domain iovad;
 		struct gcip_mem_pool mem_pool;
@@ -238,13 +239,13 @@ static inline bool gcip_iommu_domain_is_legacy_mode(struct gcip_iommu_domain *do
  *               00 = DMA_BIDIRECTIONAL (host/device can write buffer)
  *               01 = DMA_TO_DEVICE     (host can write buffer)
  *               10 = DMA_FROM_DEVICE   (device can write buffer)
- *               (See https://docs.kernel.org/core-api/dma-api-howto.html#dma-direction)
+ *               (See [REDACTED]
  *   [2:2]   - Coherent Mapping:
  *               0 = Create non-coherent mappings of the buffer.
  *               1 = Create coherent mappings of the buffer.
  *   [12:3]  - DMA_ATTR:
  *               Not used in the non-legacy mode.
- *               (See https://www.kernel.org/doc/Documentation/core-api/dma-attributes.rst)
+ *               (See [REDACTED]
  *   [63:13] - RESERVED
  *               Set RESERVED bits to 0 to ensure backwards compatibility.
  *
diff --git a/gcip-kernel-driver/include/gcip/gcip-kci.h b/gcip-kernel-driver/include/gcip/gcip-kci.h
index 1cfc82e..74670de 100644
--- a/gcip-kernel-driver/include/gcip/gcip-kci.h
+++ b/gcip-kernel-driver/include/gcip/gcip-kci.h
@@ -233,6 +233,11 @@ struct gcip_kci_ops {
 	 * Context: normal.
 	 */
 	int (*update_usage)(struct gcip_kci *kci);
+	/*
+	 * Checks if the block is off.
+	 * Context: in_interrupt().
+	 */
+	bool (*is_block_off)(struct gcip_kci *kci);
 };
 
 struct gcip_kci {
diff --git a/gcip-kernel-driver/include/gcip/gcip-mailbox.h b/gcip-kernel-driver/include/gcip/gcip-mailbox.h
index af48ba6..b16c15f 100644
--- a/gcip-kernel-driver/include/gcip/gcip-mailbox.h
+++ b/gcip-kernel-driver/include/gcip/gcip-mailbox.h
@@ -344,6 +344,11 @@ struct gcip_mailbox_ops {
 	 * Context: normal and in_interrupt().
 	 */
 	void (*release_awaiter_data)(void *data);
+	/*
+	 * Checks if the block is off.
+	 * Context: in_interrupt()
+	 */
+	bool (*is_block_off)(struct gcip_mailbox *mailbox);
 };
 
 struct gcip_mailbox {
diff --git a/gcip-kernel-driver/include/gcip/gcip-pm.h b/gcip-kernel-driver/include/gcip/gcip-pm.h
index 1e6ce05..7e3a3e4 100644
--- a/gcip-kernel-driver/include/gcip/gcip-pm.h
+++ b/gcip-kernel-driver/include/gcip/gcip-pm.h
@@ -23,6 +23,8 @@ struct gcip_pm {
 	int count;
 	/* Flag indicating a deferred power down is pending. Protected by @lock */
 	bool power_down_pending;
+	/* The worker to asynchronously call gcip_pm_put(). */
+	struct work_struct put_async_work;
 
 	/* Callbacks. See struct gcip_pm_args. */
 	void *data;
@@ -99,6 +101,12 @@ int gcip_pm_get(struct gcip_pm *pm);
  */
 void gcip_pm_put(struct gcip_pm *pm);
 
+/* Schedules an asynchronous job to execute gcip_pm_put(). */
+void gcip_pm_put_async(struct gcip_pm *pm);
+
+/* Flushes the pending pm_put work if any. */
+void gcip_pm_flush_put_work(struct gcip_pm *pm);
+
 /* Gets the power up counter. Note that this is checked without PM lock. */
 int gcip_pm_get_count(struct gcip_pm *pm);
 
diff --git a/gxp-common-platform.c b/gxp-common-platform.c
index 916603a..13bfbc2 100644
--- a/gxp-common-platform.c
+++ b/gxp-common-platform.c
@@ -311,7 +311,10 @@ static int gxp_unmap_buffer(struct gxp_client *client,
 		goto out_put;
 	}
 
-	WARN_ON(map->host_address != ibuf.host_address);
+	if (map->host_address != ibuf.host_address)
+		dev_warn(
+			gxp->dev,
+			"The host address of the unmap request is different from the original one\n");
 
 	gxp_vd_mapping_remove(client->vd, map);
 	gxp_mapping_iova_log(client, map,
@@ -2281,6 +2284,11 @@ static int gxp_common_platform_remove(struct platform_device *pdev)
 {
 	struct gxp_dev *gxp = platform_get_drvdata(pdev);
 
+	/*
+	 * This may power off the BLK, so should do it first before releasing
+	 * any resource.
+	 */
+	gcip_pm_flush_put_work(gxp->power_mgr->pm);
 	gxp_device_remove(gxp);
 	if (gxp->before_remove)
 		gxp->before_remove(gxp);
diff --git a/gxp-debug-dump.c b/gxp-debug-dump.c
index 188aaf2..7491e7c 100644
--- a/gxp-debug-dump.c
+++ b/gxp-debug-dump.c
@@ -294,11 +294,17 @@ static int gxp_get_common_dump(struct gxp_dev *gxp)
 		&common_dump->common_dump_data;
 	int ret;
 
-	/* Power on BLK_AUR to read the common registers */
-	ret = gcip_pm_get(gxp->power_mgr->pm);
+	/*
+	 * Keep BLK_AUR on to read the common registers. If BLK_AUR is off or
+	 * another thread is doing power operations, i.e. holding the pm lock,
+	 * give up to read registers. The reason of the former one is we already
+	 * lost the register values if BLK_AUR is off, and the reason of the
+	 * latter one is to prevent any possible deadlock.
+	 */
+	ret = gcip_pm_get_if_powered(gxp->power_mgr->pm, /*blocking=*/false);
 	if (ret) {
-		dev_err(gxp->dev,
-			"Failed to acquire wakelock for getting common dump\n");
+		dev_err(gxp->dev, "Failed to acquire wakelock for getting common dump, ret:%d\n",
+			ret);
 		return ret;
 	}
 	gxp_pm_update_requested_power_states(gxp, off_states, uud_states);
@@ -309,7 +315,13 @@ static int gxp_get_common_dump(struct gxp_dev *gxp)
 	gxp_get_lpm_registers(gxp, &common_seg_header[GXP_LPM_REGISTERS_IDX],
 			      &common_dump_data->lpm_regs);
 
-	gcip_pm_put(gxp->power_mgr->pm);
+	/*
+	 * Calling gcip_pm_put() here might power MCU down and handle RKCI to form
+	 * a lock dependency cycle.
+	 * To avoid this, call it asynchronously.
+	 */
+	gcip_pm_put_async(gxp->power_mgr->pm);
+
 	gxp_pm_update_requested_power_states(gxp, uud_states, off_states);
 
 	dev_dbg(gxp->dev, "Segment Header for Common Segment\n");
@@ -326,6 +338,7 @@ static int gxp_get_common_dump(struct gxp_dev *gxp)
 static void gxp_send_to_sscd(struct gxp_dev *gxp, void *segs, int seg_cnt,
 			     const char *info)
 {
+	int ret;
 	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
 	struct sscd_platform_data *pdata =
 		(struct sscd_platform_data *)mgr->sscd_pdata;
@@ -335,9 +348,10 @@ static void gxp_send_to_sscd(struct gxp_dev *gxp, void *segs, int seg_cnt,
 		return;
 	}
 
-	if (pdata->sscd_report(gxp->debug_dump_mgr->sscd_dev, segs, seg_cnt,
-			       SSCD_FLAGS_ELFARM64HDR, info)) {
-		dev_err(gxp->dev, "Unable to send the report to SSCD daemon\n");
+	ret = pdata->sscd_report(gxp->debug_dump_mgr->sscd_dev, segs, seg_cnt,
+				 SSCD_FLAGS_ELFARM64HDR, info);
+	if (ret) {
+		dev_err(gxp->dev, "Unable to send the report to SSCD daemon (ret=%d)\n", ret);
 		return;
 	}
 }
@@ -819,7 +833,7 @@ int gxp_debug_dump_process_dump_mcu_mode(struct gxp_dev *gxp, uint core_list,
 	lockdep_assert_held(&crashed_vd->debug_dump_lock);
 
 	if (crashed_vd->state != GXP_VD_UNAVAILABLE) {
-		dev_dbg(gxp->dev, "Invalid vd state=%u for processing dumps.\n",
+		dev_err(gxp->dev, "Invalid vd state=%u for processing dumps.\n",
 			crashed_vd->state);
 		return -EINVAL;
 	}
diff --git a/gxp-lpm.c b/gxp-lpm.c
index 4897f48..fcf5bf5 100644
--- a/gxp-lpm.c
+++ b/gxp-lpm.c
@@ -17,20 +17,23 @@
 #include "gxp-internal.h"
 #include "gxp-lpm.h"
 
-#define gxp_lpm_wait_until(lpm_state, condition)                               \
-	do {                                                                   \
-		int i = 100000;                                                \
-		while (i) {                                                    \
-			lpm_state =                                            \
-				lpm_read_32_psm(gxp, psm, PSM_REG_STATUS_OFFSET) & \
-				PSM_CURR_STATE_MASK;                           \
-			if (condition)                                         \
-				break;                                         \
-			udelay(2 * GXP_TIME_DELAY_FACTOR);                     \
-			i--;                                                   \
-		}                                                              \
-		return i != 0;                                                 \
+#if IS_GXP_TEST
+#define gxp_lpm_wait_until(ret, ...) ((ret) = true)
+#else
+#define gxp_lpm_wait_until(ret, lpm_state, condition)                                  \
+	do {                                                                           \
+		int i = 100000;                                                        \
+		while (i) {                                                            \
+			lpm_state = lpm_read_32_psm(gxp, psm, PSM_REG_STATUS_OFFSET) & \
+				    PSM_CURR_STATE_MASK;                               \
+			if (condition)                                                 \
+				break;                                                 \
+			udelay(2 * GXP_TIME_DELAY_FACTOR);                             \
+			i--;                                                           \
+		}                                                                      \
+		ret = (i != 0);                                                        \
 	} while (0)
+#endif
 
 void gxp_lpm_enable_state(struct gxp_dev *gxp, enum gxp_lpm_psm psm, uint state)
 {
@@ -241,14 +244,20 @@ void gxp_lpm_down(struct gxp_dev *gxp, uint core)
 
 bool gxp_lpm_wait_state_ne(struct gxp_dev *gxp, enum gxp_lpm_psm psm, uint state)
 {
-	uint lpm_state;
+	__maybe_unused uint lpm_state;
+	bool ret;
 
-	gxp_lpm_wait_until(lpm_state, lpm_state != state);
+	gxp_lpm_wait_until(ret, lpm_state, lpm_state != state);
+
+	return ret;
 }
 
 bool gxp_lpm_wait_state_eq(struct gxp_dev *gxp, enum gxp_lpm_psm psm, uint state)
 {
-	uint lpm_state;
+	__maybe_unused uint lpm_state;
+	bool ret;
+
+	gxp_lpm_wait_until(ret, lpm_state, lpm_state == state);
 
-	gxp_lpm_wait_until(lpm_state, lpm_state == state);
+	return ret;
 }
diff --git a/gxp-lpm.h b/gxp-lpm.h
index 5af1c89..3c3f8b0 100644
--- a/gxp-lpm.h
+++ b/gxp-lpm.h
@@ -28,6 +28,10 @@ enum psm_reg_offset {
 	PSM_REG_START_OFFSET,
 	PSM_REG_STATUS_OFFSET,
 	PSM_REG_CFG_OFFSET,
+	PSM_REG_DEBUG_CFG_OFFSET,
+	PSM_REG_GPIN_LO_RD_OFFSET,
+	PSM_REG_GPOUT_LO_WRT_OFFSET,
+	PSM_REG_GPOUT_LO_RD_OFFSET,
 };
 
 #define LPM_INSTRUCTION_OFFSET 0x00000944
@@ -134,6 +138,14 @@ static u32 get_reg_offset(struct gxp_dev *gxp, enum psm_reg_offset reg_offset, e
 		return gxp_lpm_psm_get_status_offset(psm);
 	case PSM_REG_CFG_OFFSET:
 		return gxp_lpm_psm_get_cfg_offset(psm);
+	case PSM_REG_DEBUG_CFG_OFFSET:
+		return gxp_lpm_psm_get_debug_cfg_offset(psm);
+	case PSM_REG_GPIN_LO_RD_OFFSET:
+		return gxp_lpm_psm_get_gpin_lo_rd_offset(psm);
+	case PSM_REG_GPOUT_LO_WRT_OFFSET:
+		return gxp_lpm_psm_get_gpout_lo_wrt_offset(psm);
+	case PSM_REG_GPOUT_LO_RD_OFFSET:
+		return gxp_lpm_psm_get_gpout_lo_rd_offset(psm);
 	}
 
 	return 0;
diff --git a/gxp-mcu-firmware.c b/gxp-mcu-firmware.c
index b95bab9..c543405 100644
--- a/gxp-mcu-firmware.c
+++ b/gxp-mcu-firmware.c
@@ -50,6 +50,9 @@
 /* Value of Magic field in the common header "DSPF' as a 32-bit LE int */
 #define GXP_FW_MAGIC 0x46505344
 
+/* The number of times trying to rescue MCU. */
+#define MCU_RESCUE_TRY 3
+
 /*
  * Programs instruction remap CSRs.
  */
@@ -80,39 +83,107 @@ static bool is_signed_firmware(const struct firmware *fw,
 	return true;
 }
 
+static int gxp_mcu_firmware_handshake(struct gxp_mcu_firmware *mcu_fw)
+{
+	struct gxp_dev *gxp = mcu_fw->gxp;
+	struct gxp_mcu *mcu = container_of(mcu_fw, struct gxp_mcu, fw);
+	enum gcip_fw_flavor fw_flavor;
+	int ret;
+
+	dev_dbg(gxp->dev, "Detecting MCU firmware info...");
+	mcu_fw->fw_info.fw_build_time = 0;
+	mcu_fw->fw_info.fw_flavor = GCIP_FW_FLAVOR_UNKNOWN;
+	mcu_fw->fw_info.fw_changelist = 0;
+	fw_flavor = gxp_kci_fw_info(&mcu->kci, &mcu_fw->fw_info);
+	if (fw_flavor < 0) {
+		dev_err(gxp->dev, "MCU firmware handshake failed: %d",
+			fw_flavor);
+		mcu_fw->fw_info.fw_flavor = GCIP_FW_FLAVOR_UNKNOWN;
+		mcu_fw->fw_info.fw_changelist = 0;
+		mcu_fw->fw_info.fw_build_time = 0;
+		return fw_flavor;
+	}
+
+	dev_info(gxp->dev, "loaded %s MCU firmware (%u)",
+		 gcip_fw_flavor_str(fw_flavor), mcu_fw->fw_info.fw_changelist);
+
+	gxp_bpm_stop(gxp, GXP_MCU_CORE_ID);
+	dev_notice(gxp->dev, "MCU Instruction read transactions: 0x%x\n",
+		   gxp_bpm_read_counter(gxp, GXP_MCU_CORE_ID, INST_BPM_OFFSET));
+
+	ret = gxp_mcu_telemetry_kci(mcu);
+	if (ret)
+		dev_warn(gxp->dev, "telemetry KCI error: %d", ret);
+
+	ret = gcip_thermal_restore_on_powering(gxp->thermal);
+	if (ret)
+		dev_warn(gxp->dev, "thermal restore error: %d", ret);
+
+	ret = gxp_kci_set_device_properties(&mcu->kci, &gxp->device_prop);
+	if (ret)
+		dev_warn(gxp->dev, "Failed to pass device_prop to fw: %d\n", ret);
+
+	return 0;
+}
+
 /*
- * Waits for the MCU LPM transition to the PG state. If it fails, it will reboot the whole block.
+ * Waits for the MCU LPM transition to the PG state.
  *
  * Must be called with holding @mcu_fw->lock.
  *
  * @ring_doorbell: If the situation is that the MCU cannot execute the transition by itself such
  *                 as HW watchdog timeout, it must be passed as true to trigger the doorbell and
  *                 let the MCU do that forcefully.
+ *
+ * Returns true if MCU successfully transited to PG state, otherwise false.
  */
-static int wait_for_pg_state_locked(struct gxp_dev *gxp, bool ring_doorbell)
+static bool wait_for_pg_state_locked(struct gxp_dev *gxp, bool ring_doorbell)
 {
 	struct gxp_mcu *mcu = &to_mcu_dev(gxp)->mcu;
 	struct gxp_mcu_firmware *mcu_fw = gxp_mcu_firmware_of(gxp);
-	int ret = 0;
+	int try = MCU_RESCUE_TRY, ret;
 
 	lockdep_assert_held(&mcu_fw->lock);
 
-	if (ring_doorbell) {
-		gxp_mailbox_set_control(mcu->kci.mbx, GXP_MBOX_CONTROL_MAGIC_POWER_DOWN);
-		gxp_doorbell_enable_for_core(gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID),
-					     GXP_MCU_CORE_ID);
-		gxp_doorbell_set(gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID));
-	}
+	do {
+		if (ring_doorbell) {
+			gxp_mailbox_set_control(mcu->kci.mbx, GXP_MBOX_CONTROL_MAGIC_POWER_DOWN);
+			gxp_doorbell_enable_for_core(gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID),
+						     GXP_MCU_CORE_ID);
+			gxp_doorbell_set(gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID));
+		}
 
-	if (!gxp_lpm_wait_state_eq(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), LPM_PG_STATE)) {
-		dev_warn(
-			gxp->dev,
-			"MCU PSM transition to PS3 fails, current state: %u. Falling back to power cycle AUR block.\n",
-			gxp_lpm_get_state(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID)));
-		ret = gxp_pm_blk_reboot(gxp, 5000);
-	}
+		if (gxp_lpm_wait_state_eq(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), LPM_PG_STATE))
+			return true;
 
-	return ret;
+		dev_warn(gxp->dev, "MCU PSM transition to PS3 fails, current state: %u, try: %d",
+			 gxp_lpm_get_state(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID)), try);
+
+		/*
+		 * If PG transition fails, MCU will not fall into WFI after the reset below.
+		 * Therefore, we must ring doorbell to let it fall into WFI from the next try.
+		 */
+		ring_doorbell = true;
+
+		ret = gxp_mcu_reset(gxp, true);
+		if (ret) {
+			dev_err(gxp->dev, "Failed to reset MCU after PG transition fails (ret=%d)",
+				ret);
+			continue;
+		}
+
+		/*
+		 * We should give enough time to MCU to register doorbell handler. We hope MCU
+		 * successfully registers the handler after the reset even if the handshake fails.
+		 */
+		ret = gxp_mcu_firmware_handshake(mcu_fw);
+		if (ret)
+			dev_err(gxp->dev,
+				"Failed to handshake with MCU after PG transition fails (ret=%d)",
+				ret);
+	} while (--try > 0);
+
+	return false;
 }
 
 int gxp_mcu_firmware_load(struct gxp_dev *gxp, char *fw_name,
@@ -218,43 +289,129 @@ void gxp_mcu_firmware_unload(struct gxp_dev *gxp, const struct firmware *fw)
 	mutex_unlock(&mcu_fw->lock);
 }
 
-static int gxp_mcu_firmware_handshake(struct gxp_mcu_firmware *mcu_fw)
+/*
+ * Boots up the MCU and program instructions.
+ * It sends `START` command to GSA in the secure mode.
+ */
+static int gxp_mcu_firmware_start(struct gxp_mcu_firmware *mcu_fw)
 {
 	struct gxp_dev *gxp = mcu_fw->gxp;
-	struct gxp_mcu *mcu = container_of(mcu_fw, struct gxp_mcu, fw);
-	enum gcip_fw_flavor fw_flavor;
-	int ret;
+	int ret, state;
 
-	dev_dbg(gxp->dev, "Detecting MCU firmware info...");
-	mcu_fw->fw_info.fw_build_time = 0;
-	mcu_fw->fw_info.fw_flavor = GCIP_FW_FLAVOR_UNKNOWN;
-	mcu_fw->fw_info.fw_changelist = 0;
-	fw_flavor = gxp_kci_fw_info(&mcu->kci, &mcu_fw->fw_info);
-	if (fw_flavor < 0) {
-		dev_err(gxp->dev, "MCU firmware handshake failed: %d",
-			fw_flavor);
-		mcu_fw->fw_info.fw_flavor = GCIP_FW_FLAVOR_UNKNOWN;
-		mcu_fw->fw_info.fw_changelist = 0;
-		mcu_fw->fw_info.fw_build_time = 0;
-		return fw_flavor;
+	gxp_bpm_configure(gxp, GXP_MCU_CORE_ID, INST_BPM_OFFSET,
+			  BPM_EVENT_READ_XFER);
+
+	ret = gxp_lpm_up(gxp, GXP_MCU_CORE_ID);
+	if (ret)
+		return ret;
+
+	if (mcu_fw->is_secure) {
+		state = gsa_send_dsp_cmd(gxp->gsa_dev, GSA_DSP_START);
+		if (state != GSA_DSP_STATE_RUNNING) {
+			gxp_lpm_down(gxp, GXP_MCU_CORE_ID);
+			return -EIO;
+		}
+	} else {
+		program_iremap_csr(gxp, &mcu_fw->image_buf);
+		/* Raise wakeup doorbell */
+		dev_dbg(gxp->dev, "Raising doorbell %d interrupt\n",
+			CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID));
+		gxp_doorbell_enable_for_core(
+			gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID),
+			GXP_MCU_CORE_ID);
+		gxp_doorbell_set(gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID));
 	}
 
-	dev_info(gxp->dev, "loaded %s MCU firmware (%u)",
-		 gcip_fw_flavor_str(fw_flavor), mcu_fw->fw_info.fw_changelist);
+	return 0;
+}
 
-	gxp_bpm_stop(gxp, GXP_MCU_CORE_ID);
-	dev_notice(gxp->dev, "MCU Instruction read transactions: 0x%x\n",
-		   gxp_bpm_read_counter(gxp, GXP_MCU_CORE_ID, INST_BPM_OFFSET));
+/*
+ * Shutdowns the MCU.
+ * It sends `SHUTDOWN` command to GSA in the secure mode.
+ *
+ * Note that this function doesn't call `gxp_lpm_down`.
+ *
+ * 1. When MCU normally powered off after SHUTDOWN KCI.
+ *    : It is already in PG state and we don't need to call that.
+ *
+ * 2. When we are going to shutdown MCU which is in abnormal state even after trying to rescue it.
+ *    : We can't decide the state of MCU PSM or the AUR_BLOCK and accessing LPM CSRs might not be
+ *      a good idea.
+ */
+static void gxp_mcu_firmware_shutdown(struct gxp_mcu_firmware *mcu_fw)
+{
+	struct gxp_dev *gxp = mcu_fw->gxp;
 
-	ret = gxp_mcu_telemetry_kci(mcu);
-	if (ret)
-		dev_warn(gxp->dev, "telemetry KCI error: %d", ret);
+	if (mcu_fw->is_secure)
+		gsa_send_dsp_cmd(gxp->gsa_dev, GSA_DSP_SHUTDOWN);
+}
 
-	ret = gcip_thermal_restore_on_powering(gxp->thermal);
-	if (ret)
-		dev_warn(gxp->dev, "thermal restore error: %d", ret);
+/*
+ * Rescues the MCU which is not working properly. After the rescue, the MCU must be in PS0 state
+ * with an expectation of working normally. Basically, what this function doing is resetting MCU,
+ * block power cycling and handshaking with MCU.
+ *
+ * Must be called with holding @mcu_fw->lock and @pm->lock.
+ *
+ * Returns 0 if it successfully rescued and hanshaked with the MCU.
+ */
+static int gxp_mcu_firmware_rescue(struct gxp_dev *gxp)
+{
+	struct gxp_mcu_firmware *mcu_fw = gxp_mcu_firmware_of(gxp);
+	int try = MCU_RESCUE_TRY, ret = 0;
 
-	return 0;
+	gcip_pm_lockdep_assert_held(gxp->power_mgr->pm);
+	lockdep_assert_held(&mcu_fw->lock);
+
+	do {
+		dev_warn(gxp->dev, "Try to rescue MCU (try=%d)", try);
+
+		/*
+		 * TODO(b/286179665): Currently, this function must not be called when MCU is in
+		 * PS0 state because GSA shutdown will be NO-OP and powering block down will cause
+		 * a kernel panic eventually. We need to ask the architecture team for sharing how
+		 * to forcefully transit MCU to PS3 state with us.
+		 */
+		if (!wait_for_pg_state_locked(gxp, true)) {
+			dev_err(gxp->dev,
+				"Cannot proceed MCU rescue because it is not in PG state");
+			ret = -EAGAIN;
+			continue;
+		}
+
+		/* Try power cycle after resetting the MCU and still holding the reset bits. */
+		ret = gxp_mcu_reset(gxp, false);
+		if (ret) {
+			dev_err(gxp->dev, "Failed to reset MCU (ret=%d)", ret);
+			continue;
+		}
+
+		gxp_mcu_firmware_shutdown(mcu_fw);
+
+		ret = gxp_pm_blk_reboot(gxp, 5000);
+		if (ret) {
+			dev_err(gxp->dev, "Failed to power cycle AUR block, (ret=%d)", ret);
+			continue;
+		}
+
+		/* Try booting MCU up again and hanshaking with it. */
+		ret = gxp_mcu_firmware_start(mcu_fw);
+		if (ret) {
+			dev_err(gxp->dev, "Failed to boot MCU up, (ret=%d)", ret);
+			continue;
+		}
+
+		ret = gxp_mcu_firmware_handshake(mcu_fw);
+		if (ret) {
+			dev_err(gxp->dev, "Failed to handshake with MCU even after rescue (ret=%d)",
+				ret);
+			continue;
+		}
+
+		dev_info(gxp->dev, "Succeeded in rescuing MCU");
+	} while (ret && --try > 0);
+
+	return ret;
 }
 
 static void gxp_mcu_firmware_stop_locked(struct gxp_mcu_firmware *mcu_fw)
@@ -275,12 +432,16 @@ static void gxp_mcu_firmware_stop_locked(struct gxp_mcu_firmware *mcu_fw)
 		dev_warn(gxp->dev, "KCI shutdown failed: %d", ret);
 
 	/*
-	 * If shutdown KCI fails, we can suspect MCU has some issues. In that case, it would be
-	 * good to ring the doorbell and make MCU transit to PG state by force.
+	 * Waits for MCU transiting to PG state. If KCI shutdown was failed above (ret != 0), it
+	 * will wait for that with ringing the doorbell.
 	 */
-	ret = wait_for_pg_state_locked(gxp, ret);
-	if (ret)
-		dev_err(gxp->dev, "Failed to transit MCU to PG state after KCI shutdown: %d", ret);
+	if (!wait_for_pg_state_locked(gxp, /*ring_doorbell=*/ret)) {
+		dev_err(gxp->dev, "Failed to transit MCU to PG state after KCI shutdown");
+		/*
+		 * TODO(b/286179665): Call rescue function and ring doorbell to transit MCU to PG
+		 * from here.
+		 */
+	}
 
 	/* To test the case of the MCU FW sending FW_CRASH RKCI in the middle. */
 	TEST_FLUSH_KCI_WORKERS(mcu->kci);
@@ -292,67 +453,7 @@ static void gxp_mcu_firmware_stop_locked(struct gxp_mcu_firmware *mcu_fw)
 	 */
 	gxp_kci_reinit(&mcu->kci);
 
-	if (mcu_fw->is_secure)
-		gsa_send_dsp_cmd(gxp->gsa_dev, GSA_DSP_SHUTDOWN);
-}
-
-static int gxp_mcu_firmware_power_up(struct gxp_mcu_firmware *mcu_fw)
-{
-	struct gxp_dev *gxp = mcu_fw->gxp;
-	int ret;
-	int state;
-	bool pg_state = false;
-
-	gxp_bpm_configure(gxp, GXP_MCU_CORE_ID, INST_BPM_OFFSET,
-			  BPM_EVENT_READ_XFER);
-
-	ret = gxp_lpm_up(gxp, GXP_MCU_CORE_ID);
-	if (ret)
-		return ret;
-
-	if (mcu_fw->is_secure) {
-		state = gsa_send_dsp_cmd(gxp->gsa_dev, GSA_DSP_START);
-		if (state != GSA_DSP_STATE_RUNNING) {
-			ret = -EIO;
-			goto err_lpm_down;
-		}
-	} else {
-		program_iremap_csr(gxp, &mcu_fw->image_buf);
-		/* Raise wakeup doorbell */
-		dev_dbg(gxp->dev, "Raising doorbell %d interrupt\n",
-			CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID));
-		gxp_doorbell_enable_for_core(
-			gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID),
-			GXP_MCU_CORE_ID);
-		gxp_doorbell_set(gxp, CORE_WAKEUP_DOORBELL(GXP_MCU_CORE_ID));
-	}
-
-	ret = gxp_mcu_firmware_handshake(mcu_fw);
-	if (ret) {
-		/* MCU seems to have some problems. Wait for it being transit to PG state. */
-		if (wait_for_pg_state_locked(gxp, true))
-			dev_err(gxp->dev,
-				"Failed to transit MCU LPM to PG while handling handshake failure");
-		/*
-		 * Set it as true even though it fails to wait for the PG state above just in case
-		 * the BLK is not powered on properly. Because accessing LPM CSRs in `gxp_lpm_down`
-		 * in that case might cause the kernel panic.
-		 */
-		pg_state = true;
-		goto err_mcu_shutdown;
-	}
-
-	dev_info(gxp->dev, "MCU firmware run succeeded");
-
-	return ret;
-
-err_mcu_shutdown:
-	if (mcu_fw->is_secure)
-		gsa_send_dsp_cmd(gxp->gsa_dev, GSA_DSP_SHUTDOWN);
-err_lpm_down:
-	if (!pg_state)
-		gxp_lpm_down(gxp, GXP_MCU_CORE_ID);
-	return ret;
+	gxp_mcu_firmware_shutdown(mcu_fw);
 }
 
 /*
@@ -361,21 +462,39 @@ err_lpm_down:
 static int gxp_mcu_firmware_run_locked(struct gxp_mcu_firmware *mcu_fw)
 {
 	struct gxp_dev *gxp = mcu_fw->gxp;
-	struct gxp_mcu *mcu = container_of(mcu_fw, struct gxp_mcu, fw);
 	int ret;
 
 	lockdep_assert_held(&mcu_fw->lock);
 
-	ret = gxp_mcu_firmware_power_up(mcu_fw);
+	ret = gxp_mcu_firmware_start(mcu_fw);
 	if (ret)
 		return ret;
 
-	ret = gxp_kci_set_device_properties(&mcu->kci, &gxp->device_prop);
-	if (ret)
-		dev_warn(gxp->dev, "Failed to pass device_prop to fw: %d\n",
-			 ret);
+	ret = gxp_mcu_firmware_handshake(mcu_fw);
+	if (ret) {
+		dev_warn(gxp->dev, "Retry MCU firmware handshake with resetting MCU");
+		if (!gxp_mcu_reset(gxp, true))
+			ret = gxp_mcu_firmware_handshake(mcu_fw);
+	}
+
+	/*
+	 * We don't need to handshake again if it successfully rescues MCU because it will try
+	 * handshake internally.
+	 */
+	if (ret) {
+		ret = gxp_mcu_firmware_rescue(gxp);
+		if (ret) {
+			dev_err(gxp->dev, "Failed to run MCU even after trying to rescue it: %d",
+				ret);
+			gxp_mcu_firmware_shutdown(mcu_fw);
+			return ret;
+		}
+	}
 
 	mcu_fw->status = GCIP_FW_VALID;
+
+	dev_info(gxp->dev, "MCU firmware run succeeded");
+
 	return 0;
 }
 
@@ -603,7 +722,7 @@ void gxp_mcu_firmware_crash_handler(struct gxp_dev *gxp,
 	 *   -> holds @gxp->client_list_lock -> hold @client->semaphore -> holds @gxp->vd_semaphore
 	 *
 	 * Also, in the case of starting MCU FW, the locking order will be:
-	 *   gcip_pm_get -> holds @pm->lock -> gxp_mcu_firmware_start -> holds @mcu_fw->lock
+	 *   gcip_pm_get -> holds @pm->lock -> gxp_mcu_firmware_run -> holds @mcu_fw->lock
 	 *
 	 * To prevent a deadlock issue, we have to follow the same locking order from here.
 	 */
@@ -667,10 +786,10 @@ void gxp_mcu_firmware_crash_handler(struct gxp_dev *gxp,
 		}
 	}
 
-	/* Turn off and on the MCU PSM and restart the MCU firmware. */
-	ret = wait_for_pg_state_locked(gxp, crash_type == GCIP_FW_CRASH_HW_WDG_TIMEOUT);
-	if (ret) {
-		dev_err(gxp->dev, "Failed to transit MCU LPM state to PG (ret=%d)", ret);
+	/* Waits for the MCU transiting to PG state and restart the MCU firmware. */
+	if (!wait_for_pg_state_locked(gxp, crash_type == GCIP_FW_CRASH_HW_WDG_TIMEOUT)) {
+		dev_err(gxp->dev, "Failed to transit MCU LPM state to PG");
+		/* TODO(b/286179665): Call rescue function from here. */
 		goto out;
 	}
 
diff --git a/gxp-mcu-platform.c b/gxp-mcu-platform.c
index 467525f..47bc318 100644
--- a/gxp-mcu-platform.c
+++ b/gxp-mcu-platform.c
@@ -92,12 +92,16 @@ static int gxp_mcu_link_offload_vmbox(struct gxp_dev *gxp,
 
 	ret = gxp_kci_link_unlink_offload_vmbox(
 		kci, vd->client_id, offload_client_id, offload_chip_type, true);
-	if (ret)
+	if (ret) {
 		dev_err(gxp->dev,
 			"Failed to link offload VMBox for client %d, offload client %u, offload chip type %d: %d",
 			vd->client_id, offload_client_id, offload_chip_type,
 			ret);
+		goto out;
+	}
 
+	vd->tpu_linked = true;
+out:
 	return ret;
 }
 
@@ -212,6 +216,7 @@ static void gxp_mcu_before_unmap_tpu_mbx_queue(struct gxp_dev *gxp, struct gxp_c
 	struct gxp_virtual_device *vd = client->vd;
 
 	gxp_vd_unlink_offload_vmbox(gxp, vd, vd->tpu_client_id, GCIP_KCI_OFFLOAD_CHIP_TYPE_TPU);
+	vd->tpu_client_id = -1;
 }
 
 #endif /* HAS_TPU_EXT */
diff --git a/gxp-mcu.c b/gxp-mcu.c
index 4fda8d8..597e479 100644
--- a/gxp-mcu.c
+++ b/gxp-mcu.c
@@ -5,16 +5,25 @@
  * Copyright (C) 2022 Google LLC
  */
 
+#include <linux/delay.h>
 #include <linux/sizes.h>
 
 #include <gcip/gcip-mem-pool.h>
 
 #include "gxp-config.h"
 #include "gxp-internal.h"
+#include "gxp-lpm.h"
 #include "gxp-mcu-firmware.h"
 #include "gxp-mcu.h"
 #include "gxp-uci.h"
 
+/* Setting bit 15 and 16 of GPOUT_LO_WRT register to 0 will hold MCU reset. */
+#define GPOUT_LO_MCU_RESET (3u << 15)
+#define GPOUT_LO_MCU_PSTATE (1u << 2)
+#define GPOUT_LO_MCU_PREG (1u << 3)
+#define GPIN_LO_MCU_PACCEPT (1u << 2)
+#define GPIN_LO_MCU_PDENY (1u << 3)
+
 /* Allocates the MCU <-> cores shared buffer region. */
 static int gxp_alloc_shared_buffer(struct gxp_dev *gxp, struct gxp_mcu *mcu)
 {
@@ -153,3 +162,80 @@ void gxp_mcu_exit(struct gxp_mcu *mcu)
 	gxp_mcu_mem_pools_exit(mcu);
 	gxp_mcu_firmware_exit(&mcu->fw);
 }
+
+int gxp_mcu_reset(struct gxp_dev *gxp, bool release_reset)
+{
+	u32 gpout_lo_rd, gpin_lo_rd, orig;
+	int i, ret = 0;
+
+	/* 1. Read gpout_lo_rd register. */
+	orig = gpout_lo_rd =
+		lpm_read_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), PSM_REG_GPOUT_LO_RD_OFFSET);
+
+	/* 2. Toggle bit 15 and 16 of this register to '0'. */
+	gpout_lo_rd &= ~GPOUT_LO_MCU_RESET;
+
+	/* 3. Set psm in debug mode with debug_cfg.en=1 and debug_cfg.gpout_override=1. */
+	lpm_write_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), PSM_REG_DEBUG_CFG_OFFSET, 0b11);
+
+	/* 4. Write the modified value from step2 to gpout_lo_wrt register. */
+	lpm_write_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), PSM_REG_GPOUT_LO_WRT_OFFSET,
+			 gpout_lo_rd);
+
+	/*
+	 * 5. Wait for MCU being reset.
+	 *
+	 * Basically, to verify the MCU reset, we should poll bit 0 of MCU_RESET_STATUS register
+	 * (CORERESET_N) to become 0.
+	 *
+	 * However, as we cannot access the register for the security reason, there is no way to
+	 * poll it. Based on the experiment, resetting MCU was already done when the step 4 above
+	 * is finished which took under 5 us. Therefore, waiting 1~2 ms as a margin should be
+	 * enough.
+	 */
+	usleep_range(1000, 2000);
+
+	if (!release_reset)
+		return 0;
+
+	/*
+	 * 6. Modify gpout_lo_wrt register locally to set bit [3:2]={1,0} to let MCU transit to
+	 * RUN state.
+	 */
+	gpout_lo_rd = (gpout_lo_rd | GPOUT_LO_MCU_PREG) & ~GPOUT_LO_MCU_PSTATE;
+	lpm_write_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), PSM_REG_GPOUT_LO_WRT_OFFSET,
+			 gpout_lo_rd);
+
+	/* 7. Toggle bit 15 and 16 of gpout_lo_wrt register to '1' to release reset. */
+	gpout_lo_rd |= GPOUT_LO_MCU_RESET;
+	lpm_write_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), PSM_REG_GPOUT_LO_WRT_OFFSET,
+			 gpout_lo_rd);
+
+	/* 8. Poll gpin_lo_rd for one of bit 2 (paccept) and 3 (pdeny) becoming non-zero. */
+	for (i = 10000; i > 0; i--) {
+		gpin_lo_rd = lpm_read_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID),
+					     PSM_REG_GPIN_LO_RD_OFFSET);
+		if (gpin_lo_rd & (GPIN_LO_MCU_PACCEPT | GPIN_LO_MCU_PDENY))
+			break;
+		udelay(GXP_TIME_DELAY_FACTOR);
+	}
+
+	if (!i) {
+		dev_warn(gxp->dev, "MCU is not responding to the power control");
+		ret = -ETIMEDOUT;
+	} else if (gpin_lo_rd & GPIN_LO_MCU_PDENY) {
+		dev_warn(gxp->dev, "MCU denied the power control for reset");
+		ret = -EAGAIN;
+	}
+
+	/* 9. Write gpout_lo_wrt the same as gpout_lo_rd of step 1. */
+	lpm_write_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), PSM_REG_GPOUT_LO_WRT_OFFSET, orig);
+
+	/*
+	 * 10. Move PSM back to func mode with gpout override disabled debug_cfg.en=0 and
+	 * debug_cfg.gpout=0.
+	 */
+	lpm_write_32_psm(gxp, CORE_TO_PSM(GXP_MCU_CORE_ID), PSM_REG_DEBUG_CFG_OFFSET, 0);
+
+	return ret;
+}
diff --git a/gxp-mcu.h b/gxp-mcu.h
index 3612af9..855b3e7 100644
--- a/gxp-mcu.h
+++ b/gxp-mcu.h
@@ -39,6 +39,15 @@ int gxp_mcu_init(struct gxp_dev *gxp, struct gxp_mcu *mcu);
 /* cleans up resources in @mcu */
 void gxp_mcu_exit(struct gxp_mcu *mcu);
 /*
+ * Forcefully resets MCU without LPM transition.
+ * @gxp: The GXP device to reset MCU.
+ * @release_reset: If true, it will release reset bits and let MCU transit to RUN state. Set it as
+ *                 false only when the block power cycle is needed without running MCU.
+ *
+ * Returns 0 on success, a negative errno on failure.
+ */
+int gxp_mcu_reset(struct gxp_dev *gxp, bool release_reset);
+/*
  * A wrapper function to allocate memory from @mcu->remap_data_pool.
  *
  * Returns 0 on success, a negative errno otherwise.
diff --git a/gxp-thermal.c b/gxp-thermal.c
index 671d140..4ba743b 100644
--- a/gxp-thermal.c
+++ b/gxp-thermal.c
@@ -33,6 +33,7 @@ static int gxp_thermal_set_rate(void *data, unsigned long rate)
 	struct gxp_dev *gxp = data;
 	int ret = 0;
 
+	dev_warn(gxp->dev, "Received thermal throttling requests %lu", rate);
 	if (!gxp_is_direct_mode(gxp)) {
 #if GXP_HAS_MCU
 		struct gxp_mcu *mcu = gxp_mcu_of(gxp);
diff --git a/gxp-uci.c b/gxp-uci.c
index ed2eee5..1baf09e 100644
--- a/gxp-uci.c
+++ b/gxp-uci.c
@@ -515,6 +515,7 @@ int gxp_uci_send_command(struct gxp_uci *uci, struct gxp_virtual_device *vd,
 err_free_resp:
 	if (async_resp->eventfd)
 		gxp_eventfd_put(async_resp->eventfd);
+	gxp_vd_put(vd);
 	kfree(async_resp);
 err_release_credit:
 	gxp_vd_release_credit(vd);
diff --git a/gxp-vd.c b/gxp-vd.c
index b23a759..7755d56 100644
--- a/gxp-vd.c
+++ b/gxp-vd.c
@@ -1542,9 +1542,7 @@ void gxp_vd_release_vmbox(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
 	if (vd->client_id < 0 || vd->mcu_crashed)
 		goto out;
 
-	if (vd->tpu_client_id >= 0)
-		gxp_vd_unlink_offload_vmbox(gxp, vd, vd->tpu_client_id,
-					    GCIP_KCI_OFFLOAD_CHIP_TYPE_TPU);
+	gxp_vd_unlink_offload_vmbox(gxp, vd, vd->tpu_client_id, GCIP_KCI_OFFLOAD_CHIP_TYPE_TPU);
 
 	ret = gxp_kci_release_vmbox(kci, vd->client_id);
 	if (!ret)
@@ -1570,8 +1568,8 @@ void gxp_vd_unlink_offload_vmbox(struct gxp_dev *gxp, struct gxp_virtual_device
 	struct gxp_kci *kci = &(gxp_mcu_of(gxp)->kci);
 	int ret;
 
-	if (vd->client_id < 0 || vd->tpu_client_id < 0 || vd->mcu_crashed)
-		goto out;
+	if (vd->client_id < 0 || vd->tpu_client_id < 0 || !vd->tpu_linked || vd->mcu_crashed)
+		return;
 
 	ret = gxp_kci_link_unlink_offload_vmbox(kci, vd->client_id, offload_client_id,
 						offload_chip_type, false);
@@ -1579,7 +1577,7 @@ void gxp_vd_unlink_offload_vmbox(struct gxp_dev *gxp, struct gxp_virtual_device
 		dev_err(gxp->dev,
 			"Failed to unlink offload VMBox for client %d, offload client %u, offload chip type %d: %d",
 			vd->client_id, offload_client_id, offload_chip_type, ret);
-out:
-	vd->tpu_client_id = -1;
+
+	vd->tpu_linked = false;
 }
 #endif /* GXP_HAS_MCU */
diff --git a/gxp-vd.h b/gxp-vd.h
index 396626c..f9f9bad 100644
--- a/gxp-vd.h
+++ b/gxp-vd.h
@@ -116,6 +116,8 @@ struct gxp_virtual_device {
 	 * This ID will be fetched from the TPU kernel driver.
 	 */
 	int tpu_client_id;
+	/* Whether DSP KD sent `link_offload_vmbox` KCI successfully to MCU FW or not. */
+	bool tpu_linked;
 	/*
 	 * Protects credit. Use a spin lock because the critical section of
 	 * using @credit is pretty small.
author	Aurora zuma automerger <aurora-zuma-automerger@google.com>	2023-06-07 09:19:49 +0000
committer	Cherrypicker Worker QA <android-build-cherrypicker-worker@system.gserviceaccount.com>	2023-06-09 22:24:16 +0000
commit	155374974bb32a98994b492c6ec495018707c9f5 (patch)
tree	4f0e507f3ebaa56c9218b2e62d887a603131229c
parent	ba9d2b042ab2e39d9edc327f44712cd28e960979 (diff)
download	zuma-155374974bb32a98994b492c6ec495018707c9f5.tar.gz