summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWhi copybara merger <whitechapel-automerger@google.com>2021-12-07 23:55:24 +0000
committerNrithya Kanakasabapathy <nrithya@google.com>2021-12-14 00:59:20 +0000
commitb61360624793a3b4898bf74cbc053c2422826366 (patch)
treec37c9d93e7a60d08d33998eb91bd4bc55a4296ef
parentdb8e16af324fae8e3587c62220c2dec2d787312b (diff)
downloadjaneiro-b61360624793a3b4898bf74cbc053c2422826366.tar.gz
[Copybara Auto Merge] Merge branch 'pro' into android13-gs-pixel-5.10
edgetpu: mcp notify MCP-wide thermal shutdown via kworker Bug: 207807085 Bug: 174552882 edgetpu: all mobile chips support debug dump Bug: 207459857 edgetpu: mobile: share debug dump handlers Bug: 207459857 GitOrigin-RevId: b0177acc91ac4fc014be4d5a4d98253641dc91fa Change-Id: Iafa408831e81ab80ac4e476a39e81d57838857a5
-rw-r--r--drivers/edgetpu/edgetpu-mcp.h7
-rw-r--r--drivers/edgetpu/edgetpu-mobile-platform.h3
-rw-r--r--drivers/edgetpu/janeiro-debug-dump.c13
-rw-r--r--drivers/edgetpu/janeiro-device.c2
-rw-r--r--drivers/edgetpu/janeiro/config.h7
-rw-r--r--drivers/edgetpu/mobile-debug-dump.c244
-rw-r--r--drivers/edgetpu/mobile-debug-dump.h26
7 files changed, 255 insertions, 47 deletions
diff --git a/drivers/edgetpu/edgetpu-mcp.h b/drivers/edgetpu/edgetpu-mcp.h
index 4530d79..4d762c4 100644
--- a/drivers/edgetpu/edgetpu-mcp.h
+++ b/drivers/edgetpu/edgetpu-mcp.h
@@ -9,7 +9,9 @@
#include <linux/init.h>
#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include "edgetpu-config.h"
#include "edgetpu-internal.h"
@@ -38,6 +40,11 @@ struct edgetpu_mcp {
* One should check with !IS_ERR_OR_NULL(etdevs[i]) before accessing.
*/
struct edgetpu_dev **etdevs;
+
+ /* MCP-wide fatal errors pending runtime notification */
+ uint errors_pending_mask;
+ spinlock_t errors_pending_lock;
+ struct work_struct errors_pending_work; /* for notify via kworker */
};
#ifdef EDGETPU_HAS_MCP
diff --git a/drivers/edgetpu/edgetpu-mobile-platform.h b/drivers/edgetpu/edgetpu-mobile-platform.h
index 65184ae..9d41571 100644
--- a/drivers/edgetpu/edgetpu-mobile-platform.h
+++ b/drivers/edgetpu/edgetpu-mobile-platform.h
@@ -22,6 +22,7 @@
#include "edgetpu-config.h"
#include "edgetpu-internal.h"
+#include "mobile-debug-dump.h"
#define to_mobile_dev(etdev) container_of(etdev, struct edgetpu_mobile_platform_dev, edgetpu_dev)
@@ -93,6 +94,8 @@ struct edgetpu_mobile_platform_dev {
#if IS_ENABLED(CONFIG_GOOGLE_BCL)
struct bcl_device *bcl_dev;
#endif
+ /* subsystem coredump info struct */
+ struct mobile_sscd_info sscd_info;
/* Protects TZ Mailbox client pointer */
struct mutex tz_mailbox_lock;
/* TZ mailbox client */
diff --git a/drivers/edgetpu/janeiro-debug-dump.c b/drivers/edgetpu/janeiro-debug-dump.c
index 4314abe..92c3e1a 100644
--- a/drivers/edgetpu/janeiro-debug-dump.c
+++ b/drivers/edgetpu/janeiro-debug-dump.c
@@ -1,9 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-/*
- * Implements chip specific details of debug dump memory initialization and SSCD registration.
- *
- * Copyright (C) 2021 Google, Inc.
- */
+
+#if IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP) || IS_ENABLED(CONFIG_EDGETPU_TEST)
+
+#include "mobile-debug-dump.c"
+
+#else /* IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP) || IS_ENABLED(CONFIG_EDGETPU_TEST) */
#include "edgetpu-debug-dump.c"
@@ -15,3 +16,5 @@ int edgetpu_debug_dump_init(struct edgetpu_dev *etdev)
void edgetpu_debug_dump_exit(struct edgetpu_dev *etdev)
{
}
+
+#endif /* IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP) || IS_ENABLED(CONFIG_EDGETPU_TEST) */
diff --git a/drivers/edgetpu/janeiro-device.c b/drivers/edgetpu/janeiro-device.c
index 24452da..0f28186 100644
--- a/drivers/edgetpu/janeiro-device.c
+++ b/drivers/edgetpu/janeiro-device.c
@@ -9,6 +9,7 @@
#include <linux/uaccess.h>
#include "edgetpu-config.h"
+#include "edgetpu-debug-dump.h"
#include "edgetpu-internal.h"
#include "edgetpu-mailbox.h"
#include "edgetpu-mobile-platform.h"
@@ -51,6 +52,7 @@ irqreturn_t edgetpu_chip_irq_handler(int irq, void *arg)
struct edgetpu_dev *etdev = arg;
edgetpu_telemetry_irq_handler(etdev);
+ edgetpu_debug_dump_resp_handler(etdev);
return janeiro_mailbox_handle_irq(etdev, irq);
}
diff --git a/drivers/edgetpu/janeiro/config.h b/drivers/edgetpu/janeiro/config.h
index 7a3304c..51215bd 100644
--- a/drivers/edgetpu/janeiro/config.h
+++ b/drivers/edgetpu/janeiro/config.h
@@ -58,6 +58,13 @@
/* Address from which the TPU CPU can access data in the remapped region */
#define EDGETPU_REMAPPED_DATA_ADDR \
(EDGETPU_INSTRUCTION_REMAP_BASE + EDGETPU_REMAPPED_DATA_OFFSET)
+
+/*
+ * Size of memory for FW accessible debug dump segments
+ * TODO(b/208758697): verify whether this size is good
+ */
+#define EDGETPU_DEBUG_DUMP_MEM_SIZE 0x4E0000
+
#include "config-mailbox.h"
#include "config-pwr-state.h"
#include "config-tpu-cpu.h"
diff --git a/drivers/edgetpu/mobile-debug-dump.c b/drivers/edgetpu/mobile-debug-dump.c
index 3732fbb..e0c9493 100644
--- a/drivers/edgetpu/mobile-debug-dump.c
+++ b/drivers/edgetpu/mobile-debug-dump.c
@@ -8,16 +8,35 @@
#include <linux/mutex.h>
#include <linux/platform_data/sscoredump.h>
+#include <linux/platform_device.h>
#include <linux/rbtree.h>
#include <linux/slab.h>
+#include "edgetpu-config.h"
#include "edgetpu-device-group.h"
#include "edgetpu-mailbox.h"
+#include "edgetpu-mobile-platform.h"
#include "mobile-debug-dump.h"
#include "edgetpu-debug-dump.c"
-struct mobile_sscd_mappings_dump *
+static void sscd_release(struct device *dev)
+{
+ pr_debug(DRIVER_NAME " release\n");
+}
+
+static struct sscd_platform_data sscd_pdata;
+static struct platform_device sscd_dev;
+
+/*
+ * Collects the mapping information of all the host mapping and dmabuf mapping buffers of all
+ * @groups as an array of struct mobile_sscd_mappings_dump and populates the @sscd_seg.
+ *
+ * Returns the pointer to the first element of the mappings dump array. The allocated array should
+ * be freed by the caller after the sscd segment is reported.
+ * Returns NULL in case of failure.
+ */
+static struct mobile_sscd_mappings_dump *
mobile_sscd_collect_mappings_segment(struct edgetpu_device_group **groups, size_t num_groups,
struct sscd_segment *sscd_seg)
{
@@ -30,8 +49,8 @@ mobile_sscd_collect_mappings_segment(struct edgetpu_device_group **groups, size_
mappings_dump = kmalloc(sizeof(struct mobile_sscd_mappings_dump), GFP_KERNEL);
for (idx = 0; idx < num_groups; idx++) {
mutex_lock(&groups[idx]->lock);
- new_size += groups[idx]->host_mappings.count *
- sizeof(struct mobile_sscd_mappings_dump);
+ new_size +=
+ groups[idx]->host_mappings.count * sizeof(struct mobile_sscd_mappings_dump);
resized_arr = krealloc(mappings_dump, new_size, GFP_KERNEL);
if (!resized_arr) {
kfree(mappings_dump);
@@ -80,40 +99,49 @@ mobile_sscd_collect_mappings_segment(struct edgetpu_device_group **groups, size_
return mappings_dump;
}
-size_t mobile_sscd_collect_cmd_resp_queues(struct edgetpu_dev *etdev,
- struct edgetpu_device_group **groups, size_t num_groups,
- struct sscd_segment *sscd_seg_arr)
+/*
+ * Collects the VII cmd and resp queues of all @groups that @etdev belongs to and the KCI cmd and
+ * resp queues and populates them as @sscd_seg_arr elements.
+ *
+ * Returns the total number of queues collected since some queues may have been released for groups
+ * with detached mailboxes. The return value is less than or equal to the total number of queues
+ * expected based on @num_groups i.e. (2 * @num_groups +2).
+ */
+static size_t mobile_sscd_collect_cmd_resp_queues(struct edgetpu_dev *etdev,
+ struct edgetpu_device_group **groups,
+ size_t num_groups,
+ struct sscd_segment *sscd_seg_arr)
{
struct edgetpu_kci *kci;
size_t idx;
u16 num_queues = 0;
- // Collect VII cmd and resp queues
+ /* Collect VII cmd and resp queues */
for (idx = 0; idx < num_groups; idx++) {
mutex_lock(&groups[idx]->lock);
if (!edgetpu_group_mailbox_detached_locked(groups[idx])) {
sscd_seg_arr[num_queues].addr =
- (void *)groups[idx]->vii.cmd_queue_mem.vaddr;
+ (void *)groups[idx]->vii.cmd_queue_mem.vaddr;
sscd_seg_arr[num_queues].size = groups[idx]->vii.cmd_queue_mem.size;
sscd_seg_arr[num_queues].paddr =
- (void *)groups[idx]->vii.cmd_queue_mem.tpu_addr;
+ (void *)groups[idx]->vii.cmd_queue_mem.tpu_addr;
sscd_seg_arr[num_queues].vaddr =
- (void *)groups[idx]->vii.cmd_queue_mem.vaddr;
+ (void *)groups[idx]->vii.cmd_queue_mem.vaddr;
num_queues++;
sscd_seg_arr[num_queues].addr =
- (void *)groups[idx]->vii.resp_queue_mem.vaddr;
+ (void *)groups[idx]->vii.resp_queue_mem.vaddr;
sscd_seg_arr[num_queues].size = groups[idx]->vii.resp_queue_mem.size;
sscd_seg_arr[num_queues].paddr =
- (void *)groups[idx]->vii.resp_queue_mem.tpu_addr;
+ (void *)groups[idx]->vii.resp_queue_mem.tpu_addr;
sscd_seg_arr[num_queues].vaddr =
- (void *)groups[idx]->vii.resp_queue_mem.vaddr;
+ (void *)groups[idx]->vii.resp_queue_mem.vaddr;
num_queues++;
}
mutex_unlock(&groups[idx]->lock);
}
- // Collect KCI cmd and resp queues
+ /* Collect KCI cmd and resp queues */
kci = etdev->kci;
sscd_seg_arr[num_queues].addr = (void *)kci->cmd_queue_mem.vaddr;
sscd_seg_arr[num_queues].size = MAX_QUEUE_SIZE * sizeof(struct edgetpu_command_element);
@@ -122,11 +150,195 @@ size_t mobile_sscd_collect_cmd_resp_queues(struct edgetpu_dev *etdev,
num_queues++;
sscd_seg_arr[num_queues].addr = (void *)kci->resp_queue_mem.vaddr;
- sscd_seg_arr[num_queues].size = MAX_QUEUE_SIZE *
- sizeof(struct edgetpu_kci_response_element);
+ sscd_seg_arr[num_queues].size =
+ MAX_QUEUE_SIZE * sizeof(struct edgetpu_kci_response_element);
sscd_seg_arr[num_queues].paddr = (void *)kci->resp_queue_mem.tpu_addr;
sscd_seg_arr[num_queues].vaddr = (void *)kci->resp_queue_mem.vaddr;
num_queues++;
return num_queues;
}
+
+static int mobile_sscd_generate_coredump(void *p_etdev, void *p_dump_setup)
+{
+ struct edgetpu_dev *etdev;
+ struct edgetpu_debug_dump_setup *dump_setup;
+ struct edgetpu_mobile_platform_dev *pdev;
+ struct sscd_platform_data *pdata;
+ struct platform_device *sscd_dev;
+ struct sscd_segment *segs;
+ struct edgetpu_debug_dump *debug_dump;
+ struct edgetpu_crash_reason *crash_reason;
+ struct edgetpu_dump_segment *dump_seg;
+ struct edgetpu_device_group *group;
+ struct edgetpu_device_group **groups;
+ struct edgetpu_list_group *g;
+ struct mobile_sscd_mappings_dump *mappings_dump = NULL;
+ char crash_info[128];
+ int sscd_dump_segments_num;
+ int i, ret;
+ size_t num_groups = 0, num_queues = 0;
+ u64 offset;
+
+ if (!p_etdev || !p_dump_setup)
+ return -EINVAL;
+
+ etdev = (struct edgetpu_dev *)p_etdev;
+ dump_setup = (struct edgetpu_debug_dump_setup *)p_dump_setup;
+ pdev = to_mobile_dev(etdev);
+ pdata = (struct sscd_platform_data *)pdev->sscd_info.pdata;
+ sscd_dev = (struct platform_device *)pdev->sscd_info.dev;
+ if (!pdata->sscd_report) {
+ etdev_err(etdev, "failed to generate coredump");
+ return -ENOENT;
+ }
+
+ debug_dump = (struct edgetpu_debug_dump *)(dump_setup + 1);
+
+ /* Populate crash reason */
+ crash_reason =
+ (struct edgetpu_crash_reason *)((u8 *)dump_setup + debug_dump->crash_reason_offset);
+ scnprintf(crash_info, sizeof(crash_info), "[edgetpu_coredump] error code: %#llx",
+ crash_reason->code);
+
+ mutex_lock(&etdev->groups_lock);
+ groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL);
+ if (!groups) {
+ mutex_unlock(&etdev->groups_lock);
+ return -ENOMEM;
+ }
+
+ etdev_for_each_group(etdev, g, group) {
+ if (edgetpu_device_group_is_disbanded(group))
+ continue;
+ groups[num_groups++] = edgetpu_device_group_get(group);
+ }
+ mutex_unlock(&etdev->groups_lock);
+
+ /* Allocate memory for dump segments */
+ sscd_dump_segments_num = debug_dump->dump_segments_num;
+ sscd_dump_segments_num += 2 * num_groups; /* VII cmd and resp queues */
+ sscd_dump_segments_num += num_groups ? 1 : 0; /* Mappings info */
+ sscd_dump_segments_num += 2; /* KCI cmd and resp queues */
+
+ segs = kmalloc_array(sscd_dump_segments_num, sizeof(struct sscd_segment), GFP_KERNEL);
+ if (!segs) {
+ ret = -ENOMEM;
+ goto out_sscd_generate_coredump;
+ }
+
+ /* Populate sscd segments */
+ dump_seg = (struct edgetpu_dump_segment *)((u8 *)dump_setup +
+ debug_dump->dump_segments_offset);
+ offset = debug_dump->dump_segments_offset;
+ for (i = 0; i < debug_dump->dump_segments_num; i++) {
+ segs[i].addr = dump_seg;
+ segs[i].size = sizeof(struct edgetpu_dump_segment) + dump_seg->size;
+ segs[i].paddr = (void *)(etdev->debug_dump_mem.tpu_addr + offset);
+ segs[i].vaddr = (void *)(etdev->debug_dump_mem.vaddr + offset);
+ offset += sizeof(struct edgetpu_dump_segment) + dump_seg->size;
+ dump_seg = (struct edgetpu_dump_segment *)((u8 *)dump_setup +
+ ALIGN(offset, sizeof(uint64_t)));
+ }
+
+ if (num_groups) {
+ mappings_dump = mobile_sscd_collect_mappings_segment(groups, num_groups, &segs[i]);
+ if (!mappings_dump) {
+ ret = -ENOMEM;
+ goto out_sscd_generate_coredump;
+ }
+ i++;
+ }
+
+ num_queues = mobile_sscd_collect_cmd_resp_queues(etdev, groups, num_groups, &segs[i]);
+
+ /*
+ * Adjust num of segments as some groups may have a detached mailbox.
+ * Subtract number of VII and KCI queues according to num_groups.
+ */
+ sscd_dump_segments_num -= (2 * num_groups + 2);
+ sscd_dump_segments_num += num_queues; /* Add actual number of valid VII and KCI queues */
+
+ /* Pass dump data to SSCD daemon */
+ etdev_dbg(etdev, "report: %d segments", sscd_dump_segments_num);
+ ret = pdata->sscd_report(sscd_dev, segs, sscd_dump_segments_num, SSCD_FLAGS_ELFARM64HDR,
+ crash_info);
+out_sscd_generate_coredump:
+ for (i = 0; i < num_groups; i++)
+ edgetpu_device_group_put(groups[i]);
+ kfree(mappings_dump);
+ kfree(segs);
+ kfree(groups);
+
+ return ret;
+}
+
+int edgetpu_debug_dump_init(struct edgetpu_dev *etdev)
+{
+ size_t size;
+ int ret;
+ struct edgetpu_debug_dump_setup *dump_setup;
+ struct edgetpu_mobile_platform_dev *pdev;
+
+ pdev = to_mobile_dev(etdev);
+
+ size = EDGETPU_DEBUG_DUMP_MEM_SIZE;
+
+ sscd_dev = (struct platform_device) {
+ .name = DRIVER_NAME,
+ .driver_override = SSCD_NAME,
+ .id = PLATFORM_DEVID_NONE,
+ .dev = {
+ .platform_data = &sscd_pdata,
+ .release = sscd_release,
+ },
+ };
+ /* Register SSCD platform device */
+ ret = platform_device_register(&sscd_dev);
+ if (ret) {
+ etdev_err(etdev, "SSCD platform device registration failed: %d", ret);
+ return ret;
+ }
+ /*
+ * Allocate a buffer for various dump segments
+ */
+ ret = edgetpu_alloc_coherent(etdev, size, &etdev->debug_dump_mem, EDGETPU_CONTEXT_KCI);
+ if (ret) {
+ etdev_err(etdev, "Debug dump seg alloc failed");
+ etdev->debug_dump_mem.vaddr = NULL;
+ goto out_unregister_platform;
+ }
+ dump_setup = (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr;
+ memset(dump_setup, 0, size);
+ dump_setup->dump_mem_size = size;
+
+ /*
+ * Allocate memory for debug dump handlers
+ */
+ etdev->debug_dump_handlers =
+ kcalloc(DUMP_REASON_NUM, sizeof(*etdev->debug_dump_handlers), GFP_KERNEL);
+ if (!etdev->debug_dump_handlers)
+ return -ENOMEM;
+ etdev->debug_dump_handlers[DUMP_REASON_REQ_BY_USER] = mobile_sscd_generate_coredump;
+
+ pdev->sscd_info.pdata = &sscd_pdata;
+ pdev->sscd_info.dev = &sscd_dev;
+ return ret;
+out_unregister_platform:
+ platform_device_unregister(&sscd_dev);
+ return ret;
+}
+
+void edgetpu_debug_dump_exit(struct edgetpu_dev *etdev)
+{
+ if (!etdev->debug_dump_mem.vaddr) {
+ etdev_dbg(etdev, "Debug dump not allocated");
+ return;
+ }
+ /*
+ * Free the memory assigned for debug dump
+ */
+ edgetpu_free_coherent(etdev, &etdev->debug_dump_mem, EDGETPU_CONTEXT_KCI);
+ kfree(etdev->debug_dump_handlers);
+ platform_device_unregister(&sscd_dev);
+}
diff --git a/drivers/edgetpu/mobile-debug-dump.h b/drivers/edgetpu/mobile-debug-dump.h
index 0a9aef9..f433a99 100644
--- a/drivers/edgetpu/mobile-debug-dump.h
+++ b/drivers/edgetpu/mobile-debug-dump.h
@@ -23,30 +23,4 @@ struct mobile_sscd_mappings_dump {
u64 size;
};
-struct sscd_segment;
-
-/*
- * Collects the mapping information of all the host mapping and dmabuf mapping buffers of all
- * @groups as an array of struct mobile_sscd_mappings_dump and populates the @sscd_seg.
- *
- * Returns the pointer to the first element of the mappings dump array. The allocated array should
- * be freed by the caller after the sscd segment is reported.
- * Returns NULL in case of failure.
- */
-struct mobile_sscd_mappings_dump *
-mobile_sscd_collect_mappings_segment(struct edgetpu_device_group **groups, size_t num_groups,
- struct sscd_segment *sscd_seg);
-
-/*
- * Collects the VII cmd and resp queues of all @groups that @etdev belongs to and the KCI cmd and
- * resp queues and populates them as @sscd_seg_arr elements.
- *
- * Returns the total number of queues collected since some queues may have been released for groups
- * with detached mailboxes. The return value is less than or equal to the total number of queues
- * expected based on @num_groups i.e. (2 * @num_groups +2).
- */
-size_t mobile_sscd_collect_cmd_resp_queues(struct edgetpu_dev *etdev,
- struct edgetpu_device_group **groups, size_t num_groups,
- struct sscd_segment *sscd_seg_arr);
-
#endif /* MOBILE_DEBUG_DUMP_H_ */