summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Salyzyn <salyzyn@google.com>2021-01-14 06:52:15 -0800
committerMark Salyzyn <salyzyn@google.com>2021-01-14 06:52:15 -0800
commitc47869b05f9c3403dbdc7bf3b6bba954a3904b56 (patch)
tree8de12a6bd2142df8b655596471398130f4825459
parent81a0949d37fd3f3bb4cd3d2e61b76b3ff45612e1 (diff)
parente0d2f4a867dba26c3cade6e25f2b1b61700c0978 (diff)
downloadedgetpu-c47869b05f9c3403dbdc7bf3b6bba954a3904b56.tar.gz
Merge partner/android-gs-pixel-mainline into partner/android-gs-pixel-5.10-stabilization
* partner/android-gs-pixel-mainline: Merge branch 'whitechapel' into android-gs-pixel-mainline Signed-off-by: Mark Salyzyn <salyzyn@google.com> Change-Id: Ic0a2f56d63195c6edc1e222969847e48d0745208
-rw-r--r--drivers/edgetpu/Kbuild3
-rw-r--r--drivers/edgetpu/Makefile2
-rw-r--r--drivers/edgetpu/abrolhos-debug-dump.c134
-rw-r--r--drivers/edgetpu/abrolhos-debug-dump.h16
-rw-r--r--drivers/edgetpu/abrolhos-device.c3
-rw-r--r--drivers/edgetpu/abrolhos-firmware.c7
-rw-r--r--drivers/edgetpu/abrolhos-iommu.c4
-rw-r--r--drivers/edgetpu/abrolhos-platform.c104
-rw-r--r--drivers/edgetpu/abrolhos-platform.h16
-rw-r--r--drivers/edgetpu/abrolhos-pm.c89
-rw-r--r--drivers/edgetpu/abrolhos-thermal.c58
-rw-r--r--drivers/edgetpu/abrolhos-usage-stats.c2
-rw-r--r--drivers/edgetpu/edgetpu-core.c7
-rw-r--r--drivers/edgetpu/edgetpu-debug-dump.c97
-rw-r--r--drivers/edgetpu/edgetpu-debug-dump.h10
-rw-r--r--drivers/edgetpu/edgetpu-device-group.c77
-rw-r--r--drivers/edgetpu/edgetpu-device-group.h17
-rw-r--r--drivers/edgetpu/edgetpu-firmware.c66
-rw-r--r--drivers/edgetpu/edgetpu-fs.c10
-rw-r--r--drivers/edgetpu/edgetpu-internal.h14
-rw-r--r--drivers/edgetpu/edgetpu-kci.c41
-rw-r--r--drivers/edgetpu/edgetpu-kci.h4
-rw-r--r--drivers/edgetpu/edgetpu-mailbox.c7
-rw-r--r--drivers/edgetpu/edgetpu-pm.c11
-rw-r--r--drivers/edgetpu/edgetpu-sw-watchdog.c17
-rw-r--r--drivers/edgetpu/edgetpu-sw-watchdog.h7
-rw-r--r--drivers/edgetpu/edgetpu-thermal.h3
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.c249
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.h65
-rw-r--r--drivers/edgetpu/mm-backport.h28
30 files changed, 863 insertions, 305 deletions
diff --git a/drivers/edgetpu/Kbuild b/drivers/edgetpu/Kbuild
index da88973..5361fa3 100644
--- a/drivers/edgetpu/Kbuild
+++ b/drivers/edgetpu/Kbuild
@@ -12,7 +12,7 @@ endif
edgetpu-fw-objs := edgetpu-firmware.o edgetpu-firmware-util.o edgetpu-shared-fw.o
edgetpu-objs := edgetpu-mailbox.o edgetpu-kci.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-iremap-pool.o edgetpu-sw-watchdog.o $(edgetpu-fw-objs)
-abrolhos-y := abrolhos-device.o abrolhos-device-group.o abrolhos-fs.o abrolhos-core.o abrolhos-platform.o abrolhos-iommu.o abrolhos-firmware.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o $(edgetpu-objs)
+abrolhos-y := abrolhos-device.o abrolhos-device-group.o abrolhos-fs.o abrolhos-core.o abrolhos-platform.o abrolhos-iommu.o abrolhos-firmware.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o abrolhos-usage-stats.o $(edgetpu-objs)
CFLAGS_abrolhos-fs.o := -DCONFIG_ABROLHOS=1
CFLAGS_abrolhos-core.o := -DCONFIG_ABROLHOS=1
CFLAGS_abrolhos-device.o := -DCONFIG_ABROLHOS=1
@@ -23,3 +23,4 @@ CFLAGS_abrolhos-pm.o := -DCONFIG_ABROLHOS=1
CFLAGS_abrolhos-thermal.o := -DCONFIG_ABROLHOS=1
CFLAGS_abrolhos-iommu.o := -DCONFIG_ABROLHOS=1
CFLAGS_abrolhos-debug-dump.o := -DCONFIG_ABROLHOS=1
+CFLAGS_abrolhos-usage-stats.o := -DCONFIG_ABROLHOS=1
diff --git a/drivers/edgetpu/Makefile b/drivers/edgetpu/Makefile
index b85b8fc..0391dcf 100644
--- a/drivers/edgetpu/Makefile
+++ b/drivers/edgetpu/Makefile
@@ -16,7 +16,7 @@ endif
edgetpu-fw-objs := edgetpu-firmware-util.o edgetpu-shared-fw.o edgetpu-firmware.o
edgetpu-objs := edgetpu-core.o edgetpu-mailbox.o edgetpu-kci.o edgetpu-device-group.o edgetpu-telemetry.o edgetpu-mapping.o edgetpu-dmabuf.o edgetpu-async.o edgetpu-iremap-pool.o edgetpu-sw-watchdog.o $(edgetpu-fw-objs)
-abrolhos-objs := abrolhos-device.o abrolhos-firmware.o edgetpu-fs.o abrolhos-platform.o abrolhos-iommu.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o $(edgetpu-objs)
+abrolhos-objs := abrolhos-device.o abrolhos-firmware.o edgetpu-fs.o abrolhos-platform.o abrolhos-iommu.o abrolhos-thermal.o abrolhos-pm.o abrolhos-debug-dump.o abrolhos-usage-stats.o $(edgetpu-objs)
KBUILD_OPTIONS += CONFIG_ABROLHOS=m
diff --git a/drivers/edgetpu/abrolhos-debug-dump.c b/drivers/edgetpu/abrolhos-debug-dump.c
index d39e674..cdc57e2 100644
--- a/drivers/edgetpu/abrolhos-debug-dump.c
+++ b/drivers/edgetpu/abrolhos-debug-dump.c
@@ -1,2 +1,136 @@
// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/platform_data/sscoredump.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "abrolhos-platform.h"
+
#include "edgetpu-debug-dump.c"
+
+static int abrolhos_sscd_generate_coredump(void *p_etdev, void *p_dump_setup)
+{
+ struct edgetpu_dev *etdev;
+ struct edgetpu_debug_dump_setup *dump_setup;
+ struct abrolhos_platform_dev *pdev;
+ struct sscd_platform_data *pdata;
+ struct platform_device *sscd_dev;
+ struct sscd_segment *segs;
+ struct edgetpu_debug_dump *debug_dump;
+ struct edgetpu_crash_reason *crash_reason;
+ struct edgetpu_dump_segment *dump_seg;
+ char crash_info[128];
+ int dump_segments_num;
+ int i, ret;
+ u64 offset;
+
+ if (!p_etdev || !p_dump_setup)
+ return -EINVAL;
+
+ etdev = (struct edgetpu_dev *)p_etdev;
+ dump_setup = (struct edgetpu_debug_dump_setup *)p_dump_setup;
+ pdev = container_of(etdev, struct abrolhos_platform_dev, edgetpu_dev);
+ pdata = (struct sscd_platform_data *)pdev->sscd_info.pdata;
+ sscd_dev = (struct platform_device *)pdev->sscd_info.dev;
+ if (!pdata->sscd_report) {
+ etdev_err(etdev, "failed to generate coredump");
+ return -ENOENT;
+ }
+
+ offset = sizeof(struct edgetpu_debug_dump_setup);
+ debug_dump = (struct edgetpu_debug_dump *)((u64 *)dump_setup +
+ word_align_offset(offset));
+
+ /* Populate crash reason */
+ crash_reason = (struct edgetpu_crash_reason *)((u64 *)dump_setup +
+ word_align_offset(debug_dump->crash_reason_offset));
+ scnprintf(crash_info, sizeof(crash_info),
+ "[edgetpu_coredump] error code: 0x%llx", crash_reason->code);
+
+ /* Populate dump segments */
+ dump_segments_num = debug_dump->dump_segments_num;
+ segs = kmalloc_array(dump_segments_num,
+ sizeof(struct sscd_segment),
+ GFP_KERNEL);
+ if (!segs)
+ return -ENOMEM;
+
+ dump_seg = (struct edgetpu_dump_segment *)((u64 *)dump_setup +
+ word_align_offset(debug_dump->dump_segments_offset));
+ offset = debug_dump->dump_segments_offset +
+ sizeof(struct edgetpu_dump_segment);
+ for (i = 0; i < dump_segments_num; i++) {
+ segs[i].addr = &dump_seg[i].src_addr + 1;
+ segs[i].size = dump_seg[i].size;
+ segs[i].paddr = (void *)(etdev->debug_dump_mem.tpu_addr +
+ offset);
+ segs[i].vaddr = (void *)(etdev->debug_dump_mem.vaddr +
+ offset);
+ offset += sizeof(struct edgetpu_dump_segment) + dump_seg->size;
+ dump_seg = (struct edgetpu_dump_segment *)
+ ((u64 *)dump_seg + word_align_offset(
+ sizeof(struct edgetpu_dump_segment) +
+ dump_seg->size));
+ }
+
+ /* Pass dump data to SSCD daemon */
+ etdev_dbg(etdev, "report: %d segments", dump_segments_num);
+ ret = pdata->sscd_report(sscd_dev, segs, dump_segments_num,
+ SSCD_FLAGS_ELFARM64HDR, crash_info);
+
+ kfree(segs);
+
+ return ret;
+}
+
+int edgetpu_debug_dump_init(struct edgetpu_dev *etdev)
+{
+ size_t size;
+ int ret;
+ struct edgetpu_debug_dump_setup *dump_setup;
+
+ size = EDGETPU_DEBUG_DUMP_MEM_SIZE;
+
+ /*
+ * Allocate buffers for various dump segments and map them to FW
+ * accessible regions
+ */
+ ret = edgetpu_iremap_alloc(etdev, size, &etdev->debug_dump_mem,
+ EDGETPU_CONTEXT_KCI);
+ if (ret) {
+ etdev_err(etdev, "Debug dump seg alloc failed");
+ etdev->debug_dump_mem.vaddr = NULL;
+ return ret;
+ }
+ dump_setup =
+ (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr;
+ dump_setup->dump_mem_size = size;
+ memset(dump_setup, 0, dump_setup->dump_mem_size);
+
+ /*
+ * Allocate memory for debug dump handlers
+ */
+ etdev->debug_dump_handlers = kcalloc(DUMP_REQ_REASON_NUM,
+ sizeof(*etdev->debug_dump_handlers),
+ GFP_KERNEL);
+ if (!etdev->debug_dump_handlers)
+ return -ENOMEM;
+ etdev->debug_dump_handlers[DUMP_REQ_REASON_BY_USER] =
+ abrolhos_sscd_generate_coredump;
+
+ return ret;
+}
+
+void edgetpu_debug_dump_exit(struct edgetpu_dev *etdev)
+{
+ if (!etdev->debug_dump_mem.vaddr) {
+ etdev_dbg(etdev, "Debug dump not allocated");
+ return;
+ }
+ /*
+ * Free the memory assigned for debug dump
+ */
+ edgetpu_iremap_free(etdev, &etdev->debug_dump_mem,
+ EDGETPU_CONTEXT_KCI);
+ kfree(etdev->debug_dump_handlers);
+}
diff --git a/drivers/edgetpu/abrolhos-debug-dump.h b/drivers/edgetpu/abrolhos-debug-dump.h
new file mode 100644
index 0000000..62ef111
--- /dev/null
+++ b/drivers/edgetpu/abrolhos-debug-dump.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Module that defines structure to retrieve debug dump segments
+ * from abrolhos firmware.
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+#ifndef __ABROLHOS_DEBUG_DUMP_H__
+#define __ABROLHOS_DEBUG_DUMP_H__
+
+struct abrolhos_sscd_info {
+ void *pdata; /* SSCD platform data */
+ void *dev; /* SSCD platform device */
+};
+
+#endif /* ABROLHOS_DEBUG_DUMP_H_ */
diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c
index f6a0eaf..ed1e7d5 100644
--- a/drivers/edgetpu/abrolhos-device.c
+++ b/drivers/edgetpu/abrolhos-device.c
@@ -84,8 +84,7 @@ u64 edgetpu_chip_tpu_timestamp(struct edgetpu_dev *etdev)
void edgetpu_chip_init(struct edgetpu_dev *etdev)
{
int i;
- struct edgetpu_platform_dev *etpdev = container_of(
- etdev, struct edgetpu_platform_dev, edgetpu_dev);
+ struct abrolhos_platform_dev *etpdev = to_abrolhos_dev(etdev);
/* Disable the CustomBlock Interrupt. */
edgetpu_dev_write_32(etdev, HOST_NONSECURE_INTRSRCMASKREG, 0x1);
diff --git a/drivers/edgetpu/abrolhos-firmware.c b/drivers/edgetpu/abrolhos-firmware.c
index 18a7671..2c9b87e 100644
--- a/drivers/edgetpu/abrolhos-firmware.c
+++ b/drivers/edgetpu/abrolhos-firmware.c
@@ -22,8 +22,8 @@ static int abrolhos_firmware_alloc_buffer(
struct edgetpu_firmware_buffer *fw_buf)
{
struct edgetpu_dev *etdev = et_fw->etdev;
- struct edgetpu_platform_dev *edgetpu_pdev =
- container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev);
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
+
/* Allocate extra space the image header */
size_t buffer_size =
edgetpu_pdev->fw_region_size + ABROLHOS_FW_HEADER_SIZE;
@@ -68,8 +68,7 @@ static int abrolhos_firmware_prepare_run(struct edgetpu_firmware *et_fw,
struct edgetpu_firmware_buffer *fw_buf)
{
struct edgetpu_dev *etdev = et_fw->etdev;
- struct edgetpu_platform_dev *edgetpu_pdev =
- container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev);
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
void *image_vaddr, *header_vaddr;
struct abrolhos_image_config *image_config;
phys_addr_t image_start, image_end, carveout_start, carveout_end;
diff --git a/drivers/edgetpu/abrolhos-iommu.c b/drivers/edgetpu/abrolhos-iommu.c
index 58ca89c..d776a63 100644
--- a/drivers/edgetpu/abrolhos-iommu.c
+++ b/drivers/edgetpu/abrolhos-iommu.c
@@ -186,7 +186,7 @@ out:
/* mmu_info is unused and NULL for IOMMU version, let IOMMU API supply info */
int edgetpu_mmu_attach(struct edgetpu_dev *etdev, void *mmu_info)
{
- struct edgetpu_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
struct edgetpu_iommu *etiommu;
int ret;
@@ -247,7 +247,7 @@ void edgetpu_mmu_reset(struct edgetpu_dev *etdev)
void edgetpu_mmu_detach(struct edgetpu_dev *etdev)
{
- struct edgetpu_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
struct edgetpu_iommu *etiommu = etdev->mmu_cookie;
int i, ret;
diff --git a/drivers/edgetpu/abrolhos-platform.c b/drivers/edgetpu/abrolhos-platform.c
index 1c2e90d..d04c0ae 100644
--- a/drivers/edgetpu/abrolhos-platform.c
+++ b/drivers/edgetpu/abrolhos-platform.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Platform device driver for the Google Edge TPU ML accelerator.
+ * Abrolhos device driver for the Google EdgeTPU ML accelerator.
*
* Copyright (C) 2019 Google, Inc.
*/
@@ -30,29 +30,26 @@
#include "edgetpu-mmu.h"
#include "edgetpu-telemetry.h"
-#define MAX_SEGS 1
-
static const struct of_device_id edgetpu_of_match[] = {
{ .compatible = "google,darwinn", },
{ /* end of list */ },
};
MODULE_DEVICE_TABLE(of, edgetpu_of_match);
-static void edgetpu_sscd_release(struct device *dev)
+static void sscd_release(struct device *dev)
{
pr_debug(DRIVER_NAME " release\n");
}
-static struct sscd_platform_data edgetpu_sscd_pdata;
-static struct platform_device edgetpu_sscd_dev = {
+static struct sscd_platform_data sscd_pdata;
+static struct platform_device sscd_dev = {
.name = DRIVER_NAME,
.driver_override = SSCD_NAME,
.id = -1,
.dev = {
- .platform_data = &edgetpu_sscd_pdata,
- .release = edgetpu_sscd_release,
+ .platform_data = &sscd_pdata,
+ .release = sscd_release,
},
};
-
/*
* Log and trace buffers at the beginning of the remapped region,
* pool memory afterwards.
@@ -60,7 +57,7 @@ static struct platform_device edgetpu_sscd_dev = {
#define EDGETPU_POOL_MEM_OFFSET (EDGETPU_TELEMETRY_BUFFER_SIZE * 2)
-static void abrolhos_get_telemetry_mem(struct edgetpu_platform_dev *etpdev,
+static void abrolhos_get_telemetry_mem(struct abrolhos_platform_dev *etpdev,
enum edgetpu_telemetry_type type,
struct edgetpu_coherent_mem *mem)
{
@@ -75,7 +72,8 @@ static void abrolhos_get_telemetry_mem(struct edgetpu_platform_dev *etpdev,
}
/* Setup the firmware region carveout. */
-static int edgetpu_platform_setup_fw_region(struct edgetpu_platform_dev *etpdev)
+static int
+edgetpu_platform_setup_fw_region(struct abrolhos_platform_dev *etpdev)
{
struct edgetpu_dev *etdev = &etpdev->edgetpu_dev;
struct platform_device *gsa_pdev;
@@ -163,7 +161,7 @@ out_unmap:
}
static void edgetpu_platform_cleanup_fw_region(
- struct edgetpu_platform_dev *etpdev)
+ struct abrolhos_platform_dev *etpdev)
{
gsa_unload_tpu_fw_image(etpdev->gsa_dev);
@@ -183,68 +181,7 @@ void edgetpu_setup_mmu(struct edgetpu_dev *etdev)
dev_warn(etdev->dev, "failed to attach IOMMU: %d\n", ret);
}
-static int edgetpu_sscd_generate_coredump(void)
-{
- struct sscd_platform_data *pdata = &edgetpu_sscd_pdata;
- static struct sscd_segment segs[MAX_SEGS];
- char msg[128];
- int cnt;
-
- if (!pdata->sscd_report) {
- pr_err(DRIVER_NAME " failed to generate coredump\n");
- return -1;
- }
-
- /*
- * TODO (b/156049774):
- * Replace with dump information when it's available
- */
- cnt = scnprintf(msg, sizeof(msg), "HELLO TPU!");
- segs[0].addr = (void *)&msg;
- segs[0].size = cnt;
-
- pr_debug(DRIVER_NAME " report: %d segments", MAX_SEGS);
- return pdata->sscd_report(&edgetpu_sscd_dev, segs, MAX_SEGS,
- 0, "edgetpu_coredump");
-}
-
-static ssize_t edgetpu_coredump_store(struct file *filep,
- const char __user *ubuf, size_t size, loff_t *offp)
-{
- int generate_coredump, ret;
-
- ret = kstrtoint_from_user(ubuf, size, 0, &generate_coredump);
- if (ret)
- return ret;
- if (generate_coredump) {
- ret = edgetpu_sscd_generate_coredump();
- if (ret) {
- pr_err(DRIVER_NAME " failed to generate coredump: %d\n",
- ret);
- return ret;
- }
- }
-
- return size;
-};
-
-static const struct file_operations coredump_ops = {
- .owner = THIS_MODULE,
- .write = edgetpu_coredump_store,
-};
-
-static void edgetpu_sscd_init(struct edgetpu_dev *etdev)
-{
- /*
- * TODO (b/156049774):
- * Remove debugfs file after dump information is available and
- * edgetpu_sscd_generate_coredump is triggered by a crash
- */
- debugfs_create_file("coredump", 0220, etdev->d_entry, etdev,
- &coredump_ops);
-}
-
-static int abrolhos_parse_ssmt(struct edgetpu_platform_dev *etpdev)
+static int abrolhos_parse_ssmt(struct abrolhos_platform_dev *etpdev)
{
struct edgetpu_dev *etdev = &etpdev->edgetpu_dev;
struct platform_device *pdev = to_platform_device(etdev->dev);
@@ -270,13 +207,12 @@ static int abrolhos_parse_ssmt(struct edgetpu_platform_dev *etpdev)
static int edgetpu_platform_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct edgetpu_platform_dev *edgetpu_pdev;
+ struct abrolhos_platform_dev *edgetpu_pdev;
struct resource *r;
struct edgetpu_mapped_resource regs;
int ret;
- edgetpu_pdev =
- devm_kzalloc(dev, sizeof(*edgetpu_pdev), GFP_KERNEL);
+ edgetpu_pdev = devm_kzalloc(dev, sizeof(*edgetpu_pdev), GFP_KERNEL);
if (!edgetpu_pdev)
return -ENOMEM;
@@ -379,8 +315,6 @@ static int edgetpu_platform_probe(struct platform_device *pdev)
dev_dbg(dev, "Creating thermal device\n");
edgetpu_pdev->edgetpu_dev.thermal = devm_tpu_thermal_create(dev);
- edgetpu_sscd_init(&edgetpu_pdev->edgetpu_dev);
-
dev_info(dev, "%s edgetpu initialized. Build: %s\n",
edgetpu_pdev->edgetpu_dev.dev_name, GIT_REPO_TAG);
@@ -388,6 +322,9 @@ static int edgetpu_platform_probe(struct platform_device *pdev)
/* Turn the device off unless a client request is already received. */
edgetpu_pm_shutdown(&edgetpu_pdev->edgetpu_dev, false);
+ edgetpu_pdev->sscd_info.pdata = &sscd_pdata;
+ edgetpu_pdev->sscd_info.dev = &sscd_dev;
+
return ret;
out_tel_exit:
edgetpu_telemetry_exit(&edgetpu_pdev->edgetpu_dev);
@@ -406,8 +343,7 @@ out_shutdown:
static int edgetpu_platform_remove(struct platform_device *pdev)
{
struct edgetpu_dev *etdev = platform_get_drvdata(pdev);
- struct edgetpu_platform_dev *edgetpu_pdev = container_of(
- etdev, struct edgetpu_platform_dev, edgetpu_dev);
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
abrolhos_edgetpu_firmware_destroy(etdev);
if (edgetpu_pdev->irq >= 0)
@@ -442,7 +378,7 @@ static int __init edgetpu_platform_init(void)
return ret;
/* Register SSCD platform device */
- ret = platform_device_register(&edgetpu_sscd_dev);
+ ret = platform_device_register(&sscd_dev);
if (ret)
pr_err(DRIVER_NAME " SSCD platform device registration failed: %d\n",
ret);
@@ -452,11 +388,11 @@ static int __init edgetpu_platform_init(void)
static void __exit edgetpu_platform_exit(void)
{
platform_driver_unregister(&edgetpu_platform_driver);
- platform_device_unregister(&edgetpu_sscd_dev);
+ platform_device_unregister(&sscd_dev);
edgetpu_exit();
}
-MODULE_DESCRIPTION("Google Edge TPU platform driver");
+MODULE_DESCRIPTION("Google EdgeTPU platform driver");
MODULE_LICENSE("GPL v2");
module_init(edgetpu_platform_init);
module_exit(edgetpu_platform_exit);
diff --git a/drivers/edgetpu/abrolhos-platform.h b/drivers/edgetpu/abrolhos-platform.h
index a9eca06..ecd3742 100644
--- a/drivers/edgetpu/abrolhos-platform.h
+++ b/drivers/edgetpu/abrolhos-platform.h
@@ -1,29 +1,30 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Platform device driver for the Google Edge TPU ML accelerator.
+ * Abrolhos device driver for the Google EdgeTPU ML accelerator.
*
* Copyright (C) 2019 Google, Inc.
*/
-#ifndef __EDGETPU_PLATFORM_H__
-#define __EDGETPU_PLATFORM_H__
+#ifndef __ABROLHOS_PLATFORM_H__
+#define __ABROLHOS_PLATFORM_H__
#include <linux/device.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/types.h>
-#include "edgetpu-internal.h"
+#include "abrolhos-debug-dump.h"
#include "abrolhos-pm.h"
+#include "edgetpu-internal.h"
#define to_abrolhos_dev(etdev) \
- container_of(etdev, struct edgetpu_platform_dev, edgetpu_dev)
+ container_of(etdev, struct abrolhos_platform_dev, edgetpu_dev)
struct edgetpu_platform_pwr {
struct mutex policy_lock;
enum tpu_pwr_state curr_policy;
};
-struct edgetpu_platform_dev {
+struct abrolhos_platform_dev {
struct edgetpu_dev edgetpu_dev;
struct edgetpu_platform_pwr platform_pwr;
int irq;
@@ -39,6 +40,7 @@ struct edgetpu_platform_dev {
void __iomem *ssmt_base;
struct edgetpu_coherent_mem log_mem;
struct edgetpu_coherent_mem trace_mem;
+ struct abrolhos_sscd_info sscd_info;
};
-#endif /* __EDGETPU_PLATFORM_H__ */
+#endif /* __ABROLHOS_PLATFORM_H__ */
diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c
index 6dd9ea4..04b42b5 100644
--- a/drivers/edgetpu/abrolhos-pm.c
+++ b/drivers/edgetpu/abrolhos-pm.c
@@ -107,8 +107,7 @@ static int abrolhos_pwr_state_get(void *data, u64 *val)
static int abrolhos_pwr_policy_set(void *data, u64 val)
{
- struct edgetpu_platform_dev *edgetpu_pdev =
- (struct edgetpu_platform_dev *)data;
+ struct abrolhos_platform_dev *edgetpu_pdev = (typeof(edgetpu_pdev))data;
struct edgetpu_platform_pwr *platform_pwr = &edgetpu_pdev->platform_pwr;
int ret;
@@ -129,8 +128,7 @@ static int abrolhos_pwr_policy_set(void *data, u64 val)
static int abrolhos_pwr_policy_get(void *data, u64 *val)
{
- struct edgetpu_platform_dev *edgetpu_pdev =
- (struct edgetpu_platform_dev *)data;
+ struct abrolhos_platform_dev *edgetpu_pdev = (typeof(edgetpu_pdev))data;
struct edgetpu_platform_pwr *platform_pwr = &edgetpu_pdev->platform_pwr;
mutex_lock(&platform_pwr->policy_lock);
@@ -365,8 +363,7 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm);
static int abrolhos_power_up(struct edgetpu_pm *etpm)
{
struct edgetpu_dev *etdev = etpm->etdev;
- struct edgetpu_platform_dev *edgetpu_pdev = container_of(
- etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev);
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
struct device *dev = etdev->dev;
int ret = abrolhos_pwr_state_set(dev,
abrolhos_get_initial_pwr_state(dev));
@@ -438,7 +435,7 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
static void
abrolhos_pm_shutdown_firmware(struct edgetpu_dev *etdev,
- struct edgetpu_platform_dev *edgetpu_pdev)
+ struct abrolhos_platform_dev *edgetpu_pdev)
{
if (!edgetpu_pchannel_power_down(etdev, false))
return;
@@ -467,42 +464,41 @@ abrolhos_pm_shutdown_firmware(struct edgetpu_dev *etdev,
static void abrolhos_power_down(struct edgetpu_pm *etpm)
{
- struct edgetpu_platform_dev *edgetpu_pdev = container_of(
- etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev);
+ struct edgetpu_dev *etdev = etpm->etdev;
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
u64 val;
int res;
- etdev_info(etpm->etdev, "Powering down\n");
+ etdev_info(etdev, "Powering down\n");
- if (abrolhos_pwr_state_get(etpm->etdev->dev, &val)) {
- etdev_warn(etpm->etdev, "Failed to read current power state\n");
+ if (abrolhos_pwr_state_get(etdev->dev, &val)) {
+ etdev_warn(etdev, "Failed to read current power state\n");
val = TPU_ACTIVE_NOM;
}
if (val == TPU_OFF) {
- etdev_dbg(etpm->etdev,
- "Device already off, skipping shutdown\n");
+ etdev_dbg(etdev, "Device already off, skipping shutdown\n");
return;
}
- if (etpm->etdev->kci &&
- edgetpu_firmware_status_locked(etpm->etdev) == FW_VALID) {
- abrolhos_pm_shutdown_firmware(etpm->etdev, edgetpu_pdev);
- cancel_work_sync(&etpm->etdev->kci->work);
+ if (etdev->kci && edgetpu_firmware_status_locked(etdev) == FW_VALID) {
+ /* Update usage stats before we power off fw. */
+ edgetpu_kci_update_usage(etdev);
+ abrolhos_pm_shutdown_firmware(etdev, edgetpu_pdev);
+ cancel_work_sync(&etdev->kci->work);
}
res = gsa_send_tpu_cmd(edgetpu_pdev->gsa_dev, GSA_TPU_SHUTDOWN);
if (res < 0)
- etdev_warn(etpm->etdev, "GSA shutdown request failed (%d)\n",
- res);
- abrolhos_pwr_state_set(etpm->etdev->dev, TPU_OFF);
+ etdev_warn(etdev, "GSA shutdown request failed (%d)\n", res);
+ abrolhos_pwr_state_set(etdev->dev, TPU_OFF);
}
static int abrolhos_pm_after_create(struct edgetpu_pm *etpm)
{
int ret;
- struct device *dev = etpm->etdev->dev;
- struct edgetpu_platform_dev *edgetpu_pdev = container_of(
- etpm->etdev, struct edgetpu_platform_dev, edgetpu_dev);
+ struct edgetpu_dev *etdev = etpm->etdev;
+ struct abrolhos_platform_dev *edgetpu_pdev = to_abrolhos_dev(etdev);
+ struct device *dev = etdev->dev;
ret = abrolhos_pwr_state_init(dev);
if (ret)
@@ -515,28 +511,33 @@ static int abrolhos_pm_after_create(struct edgetpu_pm *etpm)
mutex_init(&edgetpu_pdev->platform_pwr.policy_lock);
abrolhos_pwr_debugfs_dir =
debugfs_create_dir("power", edgetpu_fs_debugfs_dir());
- debugfs_create_file("state", 0660, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_pwr_state);
- debugfs_create_file("vdd_tpu", 0660, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_vdd_tpu);
- debugfs_create_file("vdd_tpu_m", 0660, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_vdd_tpu_m);
- debugfs_create_file("vdd_int_m", 0660, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_vdd_int_m);
- debugfs_create_file("core_rate", 0660, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_core_rate);
- debugfs_create_file("ctl_rate", 0660, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_ctl_rate);
- debugfs_create_file("axi_rate", 0660, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_axi_rate);
- debugfs_create_file("apb_rate", 0440, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_apb_rate);
- debugfs_create_file("uart_rate", 0440, abrolhos_pwr_debugfs_dir,
- dev, &fops_tpu_uart_rate);
+ if (!abrolhos_pwr_debugfs_dir) {
+ etdev_warn(etdev, "Failed to create debug FS power");
+ /* don't fail the procedure on debug FS creation fails */
+ return 0;
+ }
+ debugfs_create_file("state", 0660, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_pwr_state);
+ debugfs_create_file("vdd_tpu", 0660, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_vdd_tpu);
+ debugfs_create_file("vdd_tpu_m", 0660, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_vdd_tpu_m);
+ debugfs_create_file("vdd_int_m", 0660, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_vdd_int_m);
+ debugfs_create_file("core_rate", 0660, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_core_rate);
+ debugfs_create_file("ctl_rate", 0660, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_ctl_rate);
+ debugfs_create_file("axi_rate", 0660, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_axi_rate);
+ debugfs_create_file("apb_rate", 0440, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_apb_rate);
+ debugfs_create_file("uart_rate", 0440, abrolhos_pwr_debugfs_dir, dev,
+ &fops_tpu_uart_rate);
debugfs_create_file("policy", 0660, abrolhos_pwr_debugfs_dir,
- edgetpu_pdev, &fops_tpu_pwr_policy);
+ edgetpu_pdev, &fops_tpu_pwr_policy);
debugfs_create_file("core_pwr", 0660, abrolhos_pwr_debugfs_dir,
- edgetpu_pdev, &fops_tpu_core_pwr);
+ edgetpu_pdev, &fops_tpu_core_pwr);
return 0;
}
diff --git a/drivers/edgetpu/abrolhos-thermal.c b/drivers/edgetpu/abrolhos-thermal.c
index 5317f3f..c930232 100644
--- a/drivers/edgetpu/abrolhos-thermal.c
+++ b/drivers/edgetpu/abrolhos-thermal.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Edge TPU thermal driver for Abrolhos.
+ * EdgeTPU thermal driver for Abrolhos.
*
* Copyright (C) 2020 Google, Inc.
*/
@@ -54,15 +54,6 @@ static const struct edgetpu_state_pwr state_pwr_map[] = {
{ TPU_OFF, 0 },
};
-#define find_state_pwr(i, cmp_left, cmp_right, list, out_left, out_right) \
- do { \
- if (cmp_left == cmp_right) { \
- out_left = out_right; \
- return 0; \
- } \
- i++; \
- } while (i < ARRAY_SIZE(list))
-
static int edgetpu_get_max_state(struct thermal_cooling_device *cdev,
unsigned long *state)
{
@@ -70,9 +61,8 @@ static int edgetpu_get_max_state(struct thermal_cooling_device *cdev,
return 0;
}
-/* Set cooling state
- * Re-using code from abrohlos-platform.
- * TODO: move to external call
+/*
+ * Set cooling state.
*/
static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev,
unsigned long state_original)
@@ -81,7 +71,7 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev,
struct edgetpu_thermal *cooling = cdev->devdata;
struct device *dev = cooling->dev;
- if (WARN_ON(state_original >= ARRAY_SIZE(state_mapping))) {
+ if (state_original >= ARRAY_SIZE(state_mapping)) {
dev_err(dev, "%s: invalid cooling state %lu\n", __func__,
state_original);
return -EINVAL;
@@ -96,18 +86,21 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev,
*/
#if 0
ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, pwr_state);
-#endif
+#else
ret = 0;
+#endif
if (ret) {
dev_err(dev, "error setting tpu policy: %d\n", ret);
- mutex_unlock(&cooling->lock);
- return ret;
+ goto out;
}
cooling->cooling_state = state_original;
+ } else {
+ ret = -EALREADY;
}
+out:
mutex_unlock(&cooling->lock);
- return 0;
+ return ret;
}
static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev,
@@ -118,12 +111,14 @@ static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev,
*state = cooling->cooling_state;
if (*state >= ARRAY_SIZE(state_mapping)) {
- dev_warn(cooling->dev, "Unknown cooling state: %lu, resetting\n", *state);
+ dev_warn(cooling->dev,
+ "Unknown cooling state: %lu, resetting\n", *state);
mutex_lock(&cooling->lock);
ret = exynos_acpm_set_policy(TPU_ACPM_DOMAIN, TPU_ACTIVE_OD);
if (ret) {
- dev_err(cooling->dev, "error setting tpu policy: %d\n", ret);
+ dev_err(cooling->dev, "error setting tpu policy: %d\n",
+ ret);
mutex_unlock(&cooling->lock);
return ret;
}
@@ -139,13 +134,16 @@ static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev,
static int edgetpu_state2power_internal(unsigned long state, u32 *power,
struct device *dev)
{
- int i = 0;
+ int i;
- find_state_pwr(i, state, state_pwr_map[i].state, state_pwr_map, *power,
- state_pwr_map[i].power);
+ for (i = 0; i < ARRAY_SIZE(state_pwr_map); i++) {
+ if (state == state_pwr_map[i].state) {
+ *power = state_pwr_map[i].power;
+ return 0;
+ }
+ }
dev_err(dev, "Unknown state req for: %lu\n", state);
*power = 0;
- WARN_ON(1);
return -EINVAL;
}
@@ -189,7 +187,6 @@ static int edgetpu_power2state(struct thermal_cooling_device *cdev,
}
dev_err(cooling->dev, "No power2state mapping found: %d\n", power);
- WARN_ON(1);
return -EINVAL;
}
@@ -204,7 +201,8 @@ static struct thermal_cooling_device_ops edgetpu_cooling_ops = {
static void tpu_thermal_exit_cooling(struct edgetpu_thermal *thermal)
{
- thermal_cooling_device_unregister(thermal->cdev);
+ if (!IS_ERR_OR_NULL(thermal->cdev))
+ thermal_cooling_device_unregister(thermal->cdev);
}
static void tpu_thermal_exit(struct edgetpu_thermal *thermal)
@@ -243,10 +241,14 @@ tpu_thermal_cooling_register(struct edgetpu_thermal *thermal, char *type)
static int tpu_thermal_init(struct edgetpu_thermal *thermal, struct device *dev)
{
int err;
+ struct dentry *d;
+ d = debugfs_create_dir("cooling", edgetpu_fs_debugfs_dir());
+ /* don't let debugfs creation failure abort the init procedure */
+ if (!d)
+ dev_warn(dev, "failed to create debug fs for cooling");
thermal->dev = dev;
- thermal->cooling_root =
- debugfs_create_dir("cooling", edgetpu_fs_debugfs_dir());
+ thermal->cooling_root = d;
err = tpu_thermal_cooling_register(thermal, EDGETPU_COOLING_NAME);
if (err) {
diff --git a/drivers/edgetpu/abrolhos-usage-stats.c b/drivers/edgetpu/abrolhos-usage-stats.c
new file mode 100644
index 0000000..1fd1fc2
--- /dev/null
+++ b/drivers/edgetpu/abrolhos-usage-stats.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "edgetpu-usage-stats.c"
diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c
index 872c5f2..9735b28 100644
--- a/drivers/edgetpu/edgetpu-core.c
+++ b/drivers/edgetpu/edgetpu-core.c
@@ -30,6 +30,7 @@
#include "edgetpu-mcp.h"
#include "edgetpu-mmu.h"
#include "edgetpu-telemetry.h"
+#include "edgetpu-usage-stats.h"
#include "edgetpu.h"
static atomic_t single_dev_count = ATOMIC_INIT(-1);
@@ -219,6 +220,8 @@ int edgetpu_device_add(struct edgetpu_dev *etdev,
mutex_init(&etdev->open.lock);
mutex_init(&etdev->groups_lock);
+ INIT_LIST_HEAD(&etdev->groups);
+ etdev->n_groups = 0;
etdev->group_join_lockout = false;
mutex_init(&etdev->state_lock);
etdev->state = ETDEV_STATE_NOFW;
@@ -241,6 +244,8 @@ int edgetpu_device_add(struct edgetpu_dev *etdev,
}
edgetpu_setup_mmu(etdev);
+ edgetpu_usage_stats_init(etdev);
+
etdev->kci = devm_kzalloc(etdev->dev, sizeof(*etdev->kci), GFP_KERNEL);
if (!etdev->kci) {
ret = -ENOMEM;
@@ -279,6 +284,7 @@ remove_kci:
/* releases the resources of KCI */
edgetpu_mailbox_remove_all(etdev->mailbox_manager);
detach_mmu:
+ edgetpu_usage_stats_exit(etdev);
edgetpu_mmu_detach(etdev);
remove_dev:
edgetpu_mark_probe_fail(etdev);
@@ -291,6 +297,7 @@ void edgetpu_device_remove(struct edgetpu_dev *etdev)
edgetpu_chip_exit(etdev);
edgetpu_debug_dump_exit(etdev);
edgetpu_mailbox_remove_all(etdev->mailbox_manager);
+ edgetpu_usage_stats_exit(etdev);
edgetpu_mmu_detach(etdev);
edgetpu_fs_remove(etdev);
}
diff --git a/drivers/edgetpu/edgetpu-debug-dump.c b/drivers/edgetpu/edgetpu-debug-dump.c
index 6f83645..d8ccde9 100644
--- a/drivers/edgetpu/edgetpu-debug-dump.c
+++ b/drivers/edgetpu/edgetpu-debug-dump.c
@@ -5,6 +5,8 @@
*
* Copyright (C) 2020 Google, Inc.
*/
+#include <linux/workqueue.h>
+
#include "edgetpu-config.h"
#include "edgetpu-debug-dump.h"
#include "edgetpu-iremap-pool.h"
@@ -16,51 +18,6 @@ static inline u64 word_align_offset(u64 offset)
(((offset % sizeof(u64)) == 0) ? 0 : 1);
}
-int edgetpu_debug_dump_init(struct edgetpu_dev *etdev)
-{
-#ifdef CONFIG_ABROLHOS
- size_t size;
- int ret;
- struct edgetpu_debug_dump_setup *dump_setup;
-
- size = EDGETPU_DEBUG_DUMP_MEM_SIZE;
-
- /*
- * Allocate buffers for various dump segments and map them to FW
- * accessible regions
- */
- ret = edgetpu_iremap_alloc(etdev, size, &etdev->debug_dump_mem,
- EDGETPU_CONTEXT_KCI);
- if (ret) {
- etdev_err(etdev, "Debug dump seg alloc failed");
- etdev->debug_dump_mem.vaddr = NULL;
- return ret;
- }
- dump_setup =
- (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr;
- dump_setup->dump_mem_size = size;
- memset(dump_setup, 0, dump_setup->dump_mem_size);
- return ret;
-#else
- return 0;
-#endif /* CONFIG_ABROLHOS */
-}
-
-void edgetpu_debug_dump_exit(struct edgetpu_dev *etdev)
-{
-#ifdef CONFIG_ABROLHOS
- if (!etdev->debug_dump_mem.vaddr) {
- etdev_dbg(etdev, "Debug dump not allocated");
- return;
- }
- /*
- * Free the memory assigned for debug dump
- */
- edgetpu_iremap_free(etdev, &etdev->debug_dump_mem,
- EDGETPU_CONTEXT_KCI);
-#endif /* CONFIG_ABROLHOS */
-}
-
int edgetpu_get_debug_dump(struct edgetpu_dev *etdev, u64 type)
{
int ret;
@@ -86,6 +43,47 @@ int edgetpu_get_debug_dump(struct edgetpu_dev *etdev, u64 type)
return ret;
}
+static void edgetpu_debug_dump_work(struct work_struct *work)
+{
+ struct edgetpu_dev *etdev;
+ struct edgetpu_debug_dump_setup *dump_setup;
+ struct edgetpu_debug_dump *debug_dump;
+ int ret;
+ u64 offset, dump_reason;
+
+ etdev = container_of(work, struct edgetpu_dev, debug_dump_work);
+ dump_setup =
+ (struct edgetpu_debug_dump_setup *)etdev->debug_dump_mem.vaddr;
+ offset = sizeof(struct edgetpu_debug_dump_setup);
+ debug_dump = (struct edgetpu_debug_dump *)((u64 *)dump_setup +
+ word_align_offset(offset));
+
+ if (!etdev->debug_dump_handlers) {
+ etdev_err(etdev,
+ "Failed to generate coredump as handler is NULL");
+ goto debug_dump_work_done;
+ }
+
+ dump_reason = dump_setup->dump_req_reason;
+ if (dump_reason >= DUMP_REQ_REASON_NUM ||
+ !etdev->debug_dump_handlers[dump_reason]) {
+ etdev_err(etdev,
+ "Failed to generate coredump as handler is NULL for dump request reason: 0x%llx",
+ dump_reason);
+ goto debug_dump_work_done;
+ }
+
+ ret = etdev->debug_dump_handlers[dump_reason]
+ ((void *)etdev, (void *)dump_setup);
+ if (ret) {
+ etdev_err(etdev, "Failed to generate coredump: %d\n", ret);
+ goto debug_dump_work_done;
+ }
+
+debug_dump_work_done:
+ debug_dump->host_dump_available_to_read = false;
+}
+
void edgetpu_debug_dump_resp_handler(struct edgetpu_dev *etdev)
{
struct edgetpu_debug_dump_setup *dump_setup;
@@ -104,9 +102,8 @@ void edgetpu_debug_dump_resp_handler(struct edgetpu_dev *etdev)
if (!debug_dump->host_dump_available_to_read)
return;
- /*
- * TODO (b/156049774): Dump segments may be collected here and exposed
- * to SSCD.
- */
- debug_dump->host_dump_available_to_read = false;
+ if (!etdev->debug_dump_work.func)
+ INIT_WORK(&etdev->debug_dump_work, edgetpu_debug_dump_work);
+
+ schedule_work(&etdev->debug_dump_work);
}
diff --git a/drivers/edgetpu/edgetpu-debug-dump.h b/drivers/edgetpu/edgetpu-debug-dump.h
index 7313021..ec33668 100644
--- a/drivers/edgetpu/edgetpu-debug-dump.h
+++ b/drivers/edgetpu/edgetpu-debug-dump.h
@@ -10,7 +10,7 @@
#include "edgetpu-internal.h"
-#define DEBUG_DUMP_HOST_CONTRACT_VERSION 1
+#define DEBUG_DUMP_HOST_CONTRACT_VERSION 2
enum edgetpu_dump_type_bit_position {
DUMP_TYPE_CRASH_REASON_BIT = 0,
@@ -24,6 +24,13 @@ enum edgetpu_dump_type_bit_position {
};
+enum edgetpu_dump_request_reason {
+ DUMP_REQ_REASON_DEFAULT = 0,
+ DUMP_REQ_REASON_WDT_TIMEOUT = 1,
+ DUMP_REQ_REASON_BY_USER = 2,
+ DUMP_REQ_REASON_NUM = 3
+};
+
struct edgetpu_crash_reason {
u64 code; /* code that captures the reset reason */
};
@@ -57,6 +64,7 @@ struct edgetpu_debug_dump_setup {
/* types of dumps requested by host */
u64 type;
u64 dump_mem_size; /* total size of memory allocated to dump */
+ u64 dump_req_reason; /* debug dump request reason */
u64 reserved[2];
};
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index f9d681e..f323a3c 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -12,6 +12,7 @@
#include <linux/eventfd.h>
#include <linux/iommu.h>
#include <linux/kconfig.h>
+#include <linux/list.h>
#include <linux/mm.h>
#include <linux/refcount.h>
#include <linux/scatterlist.h>
@@ -32,6 +33,7 @@
#include "edgetpu-sw-watchdog.h"
#include "edgetpu-usr.h"
#include "edgetpu.h"
+#include "mm-backport.h"
#ifdef EDGETPU_HAS_P2P_MAILBOX
#include "edgetpu-p2p-mailbox.h"
@@ -83,6 +85,7 @@ static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param)
struct edgetpu_dev *etdev = edgetpu_device_group_nth_etdev(group, i);
etdev_dbg(etdev, "%s: leave group %u", __func__, group->workload_id);
+ edgetpu_kci_update_usage(etdev);
edgetpu_kci_leave_group(etdev->kci);
return 0;
}
@@ -126,13 +129,20 @@ static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group)
}
/*
- * Asynchronously sends LEAVE_GROUP KCI to all devices in @group.
+ * Handle KCI chores for device group disband.
+ *
+ * For multi-chip architectures: asynchronously send LEAVE_GROUP KCI to all
+ * devices in @group (and GET_USAGE to update usage stats).
+ *
+ * For single-chip, multiple client architectures: send KCI CLOSE_DEVICE
+ * to the device (and GET_USAGE to update usage stats).
*
* Caller holds group->lock.
*/
static void edgetpu_device_group_kci_leave(struct edgetpu_device_group *group)
{
#if IS_ENABLED(CONFIG_ABROLHOS)
+ edgetpu_kci_update_usage(group->etdev);
return edgetpu_group_kci_close_device(group);
#else /* !CONFIG_ABROLHOS */
struct kci_worker_param *params =
@@ -417,34 +427,30 @@ static bool edgetpu_group_check_contiguity(struct edgetpu_device_group *group)
}
/*
- * Finds an empty slot of @etdev->groups and assigns @group to it.
+ * Inserts @group to the list @etdev->groups.
*
- * Returns the non-negative index of etdev->groups on success.
- * Returns -EBUSY if no empty slot found.
+ * Returns 0 on success.
+ * Returns -EAGAIN if group join is currently disabled.
*/
static int edgetpu_dev_add_group(struct edgetpu_dev *etdev,
struct edgetpu_device_group *group)
{
- int i;
+ struct edgetpu_list_group *l = kmalloc(sizeof(*l), GFP_KERNEL);
+ if (!l)
+ return -ENOMEM;
mutex_lock(&etdev->groups_lock);
if (etdev->group_join_lockout) {
mutex_unlock(&etdev->groups_lock);
+ kfree(l);
return -EAGAIN;
}
- for (i = 0; i < EDGETPU_NGROUPS; i++) {
- if (!etdev->groups[i])
- break;
- }
+ l->grp = edgetpu_device_group_get(group);
+ list_add_tail(&l->list, &etdev->groups);
+ etdev->n_groups++;
- if (i >= EDGETPU_NGROUPS) {
- mutex_unlock(&etdev->groups_lock);
- return -EBUSY;
- }
- etdev->groups[i] = edgetpu_device_group_get(group);
mutex_unlock(&etdev->groups_lock);
-
- return i;
+ return 0;
}
void edgetpu_device_group_put(struct edgetpu_device_group *group)
@@ -458,23 +464,16 @@ void edgetpu_device_group_put(struct edgetpu_device_group *group)
/* caller must hold @etdev->groups_lock. */
static bool edgetpu_in_any_group_locked(struct edgetpu_dev *etdev)
{
- int i;
-
- for (i = 0; i < EDGETPU_NGROUPS; i++) {
- if (etdev->groups[i])
- return true;
- }
-
- return false;
+ return etdev->n_groups;
}
/* caller must hold the client's etdev state_lock. */
void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
{
struct edgetpu_device_group *group;
+ struct edgetpu_list_group *l;
struct edgetpu_list_client *cur, *nxt;
bool will_disband = false;
- int i;
mutex_lock(&client->group_lock);
group = client->group;
@@ -530,10 +529,12 @@ void edgetpu_device_group_leave_locked(struct edgetpu_client *client)
mutex_unlock(&client->group_lock);
/* remove the group from the client device */
mutex_lock(&client->etdev->groups_lock);
- for (i = 0; i < EDGETPU_NGROUPS; i++) {
- if (client->etdev->groups[i] == group) {
- edgetpu_device_group_put(client->etdev->groups[i]);
- client->etdev->groups[i] = NULL;
+ list_for_each_entry(l, &client->etdev->groups, list) {
+ if (l->grp == group) {
+ list_del(&l->list);
+ edgetpu_device_group_put(l->grp);
+ kfree(l);
+ client->etdev->n_groups--;
break;
}
}
@@ -553,7 +554,6 @@ static int edgetpu_device_group_add_locked(struct edgetpu_device_group *group,
struct edgetpu_client *client)
{
struct edgetpu_list_client *c;
- int i;
int ret = 0;
mutex_lock(&client->group_lock);
@@ -581,10 +581,9 @@ static int edgetpu_device_group_add_locked(struct edgetpu_device_group *group,
goto out;
}
- i = edgetpu_dev_add_group(client->etdev, group);
- if (i < 0) {
+ ret = edgetpu_dev_add_group(client->etdev, group);
+ if (ret) {
kfree(c);
- ret = i;
goto out;
}
@@ -1447,14 +1446,14 @@ out:
void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev)
{
- int i;
+ struct edgetpu_list_group *l;
+ struct edgetpu_device_group *group;
mutex_lock(&etdev->groups_lock);
- for (i = 0; i < EDGETPU_NGROUPS; i++) {
- if (etdev->groups[i])
- edgetpu_group_notify(etdev->groups[i],
- EDGETPU_EVENT_FATAL_ERROR);
- }
+
+ etdev_for_each_group(etdev, l, group)
+ edgetpu_group_notify(group, EDGETPU_EVENT_FATAL_ERROR);
+
mutex_unlock(&etdev->groups_lock);
}
diff --git a/drivers/edgetpu/edgetpu-device-group.h b/drivers/edgetpu/edgetpu-device-group.h
index 3c68dd4..87a0987 100644
--- a/drivers/edgetpu/edgetpu-device-group.h
+++ b/drivers/edgetpu/edgetpu-device-group.h
@@ -106,6 +106,23 @@ struct edgetpu_device_group {
};
/*
+ * Entry of edgetpu_dev#groups.
+ *
+ * Files other than edgetpu-device-group.c shouldn't need to access this
+ * structure. Use macro etdev_for_each_group to access the groups under an
+ * etdev.
+ */
+struct edgetpu_list_group {
+ struct list_head list;
+ struct edgetpu_device_group *grp;
+};
+
+/* Macro to loop through etdev->groups. */
+#define etdev_for_each_group(etdev, l, g) \
+ for (l = list_entry(etdev->groups.next, typeof(*l), list), g = l->grp; \
+ &l->list != &etdev->groups; \
+ l = list_entry(l->list.next, typeof(*l), list), g = l->grp)
+/*
* Returns if the group is waiting for members to join.
*
* Must be called with lock held.
diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c
index ce0cd50..6edff9e 100644
--- a/drivers/edgetpu/edgetpu-firmware.c
+++ b/drivers/edgetpu/edgetpu-firmware.c
@@ -595,43 +595,41 @@ static const struct attribute_group edgetpu_firmware_attr_group = {
.attrs = dev_attrs,
};
-static void edgetpu_firmware_wdt_timeout_action(void *data)
+/*
+ * Can only be called with etdev->state == ETDEV_STATE_FWLOADING.
+ */
+static void edgetpu_abort_clients(struct edgetpu_dev *etdev)
{
- int ret, i, num_clients = 0;
- struct edgetpu_dev *etdev = data;
+ int i, num_clients = 0;
struct edgetpu_device_group *group;
- struct edgetpu_client *clients[EDGETPU_NGROUPS];
+ struct edgetpu_list_group *g;
+ struct edgetpu_client **clients;
struct edgetpu_list_client *c;
- struct edgetpu_firmware *et_fw = etdev->firmware;
-
- /* Don't attempt f/w restart if device is off. */
- if (!edgetpu_is_powered(etdev))
- return;
-
- mutex_lock(&etdev->state_lock);
- if (etdev->state == ETDEV_STATE_FWLOADING) {
- mutex_unlock(&etdev->state_lock);
- return;
- }
- etdev->state = ETDEV_STATE_FWLOADING;
- mutex_unlock(&etdev->state_lock);
/*
* We don't hold etdev->groups_lock here because
- * 1. All group operations should be protected by "state GOOD" and
+ * 1. All group operations (functions in edgetpu-device-group.c)
+ * are skipped when "etdev->state is not GOOD", we shall be the
+ * only one accessing @etdev->groups, and
* 2. to prevent LOCKDEP from reporting deadlock with
* edgetpu_device_group_add_locked, which nested holds group->lock
* then etdev->groups_lock.
*/
- for (i = 0; i < EDGETPU_NGROUPS; i++) {
- group = etdev->groups[i];
- if (!group)
- continue;
+ clients = kmalloc_array(etdev->n_groups, sizeof(*clients), GFP_KERNEL);
+ if (!clients) {
+ /*
+ * Just give up aborting clients in this case, this should never
+ * happen after all.
+ */
+ edgetpu_fatal_error_notify(etdev);
+ return;
+ }
+ etdev_for_each_group(etdev, g, group) {
mutex_lock(&group->lock);
list_for_each_entry(c, &group->clients, list) {
if (etdev == c->client->etdev) {
clients[num_clients++] =
- edgetpu_client_get(c->client);
+ edgetpu_client_get(c->client);
break;
}
}
@@ -646,6 +644,28 @@ static void edgetpu_firmware_wdt_timeout_action(void *data)
edgetpu_device_group_leave_locked(clients[i]);
edgetpu_client_put(clients[i]);
}
+ kfree(clients);
+}
+
+static void edgetpu_firmware_wdt_timeout_action(void *data)
+{
+ int ret;
+ struct edgetpu_dev *etdev = data;
+ struct edgetpu_firmware *et_fw = etdev->firmware;
+
+ /* Don't attempt f/w restart if device is off. */
+ if (!edgetpu_is_powered(etdev))
+ return;
+
+ mutex_lock(&etdev->state_lock);
+ if (etdev->state == ETDEV_STATE_FWLOADING) {
+ mutex_unlock(&etdev->state_lock);
+ return;
+ }
+ etdev->state = ETDEV_STATE_FWLOADING;
+ mutex_unlock(&etdev->state_lock);
+
+ edgetpu_abort_clients(etdev);
ret = edgetpu_firmware_lock(etdev);
/*
diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c
index 998704c..047b713 100644
--- a/drivers/edgetpu/edgetpu-fs.c
+++ b/drivers/edgetpu/edgetpu-fs.c
@@ -880,17 +880,13 @@ static const struct file_operations statusregs_ops = {
static int mappings_show(struct seq_file *s, void *data)
{
struct edgetpu_dev *etdev = s->private;
- int i;
+ struct edgetpu_list_group *l;
+ struct edgetpu_device_group *group;
mutex_lock(&etdev->groups_lock);
- for (i = 0; i < EDGETPU_NGROUPS; i++) {
- struct edgetpu_device_group *group = etdev->groups[i];
-
- if (!group)
- continue;
+ etdev_for_each_group(etdev, l, group)
edgetpu_group_mappings_show(group, s);
- }
mutex_unlock(&etdev->groups_lock);
edgetpu_kci_mappings_show(etdev, s);
diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h
index 34da92d..237fdd2 100644
--- a/drivers/edgetpu/edgetpu-internal.h
+++ b/drivers/edgetpu/edgetpu-internal.h
@@ -28,10 +28,12 @@
#include <linux/refcount.h>
#include <linux/scatterlist.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include "edgetpu.h"
#include "edgetpu-pm.h"
#include "edgetpu-thermal.h"
+#include "edgetpu-usage-stats.h"
#define etdev_err(etdev, fmt, ...) dev_err((etdev)->etcdev, fmt, ##__VA_ARGS__)
#define etdev_warn(etdev, fmt, ...) \
@@ -135,6 +137,8 @@ struct edgetpu_kci;
struct edgetpu_telemetry_ctx;
struct edgetpu_mempool;
+typedef int(*edgetpu_debug_dump_handlers)(void *etdev, void *dump_setup);
+
#define EDGETPU_DEVICE_NAME_MAX 64
/* ioremapped resource */
@@ -168,8 +172,9 @@ struct edgetpu_dev {
struct dentry *d_entry; /* debugfs dir for this device */
struct mutex state_lock; /* protects state of this device */
enum edgetpu_dev_state state;
- struct mutex groups_lock; /* protects groups and lockout */
- struct edgetpu_device_group *groups[EDGETPU_NGROUPS];
+ struct mutex groups_lock; /* protects groups, n_groups, and lockout */
+ struct list_head groups;
+ uint n_groups; /* number of entries in @groups */
bool group_join_lockout; /* disable group join while reinit */
void *mmu_cookie; /* mmu driver private data */
void *dram_cookie; /* on-device DRAM private data */
@@ -178,6 +183,7 @@ struct edgetpu_dev {
struct edgetpu_firmware *firmware; /* firmware management */
struct edgetpu_telemetry_ctx *telemetry;
struct edgetpu_thermal *thermal;
+ struct edgetpu_usage_stats *usage_stats; /* usage stats private data */
struct edgetpu_pm *pm; /* Power management interface */
/* Memory pool in instruction remap region */
struct edgetpu_mempool *iremap_pool;
@@ -185,10 +191,14 @@ struct edgetpu_dev {
uint mcp_die_index; /* physical die index w/in multichip pkg */
u8 mcp_pkg_type; /* multichip pkg type */
struct edgetpu_sw_wdt *etdev_sw_wdt; /* software watchdog */
+ bool reset_needed; /* error recovery requests full chip reset. */
/* version read from the firmware binary file */
struct edgetpu_fw_version fw_version;
atomic_t job_count; /* times joined to a device group */
struct edgetpu_coherent_mem debug_dump_mem; /* debug dump memory */
+ /* debug dump handlers */
+ edgetpu_debug_dump_handlers *debug_dump_handlers;
+ struct work_struct debug_dump_work;
};
extern const struct file_operations edgetpu_fops;
diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c
index 0a42ce6..609d411 100644
--- a/drivers/edgetpu/edgetpu-kci.c
+++ b/drivers/edgetpu/edgetpu-kci.c
@@ -19,6 +19,7 @@
#include "edgetpu-iremap-pool.h"
#include "edgetpu-mmu.h"
#include "edgetpu-telemetry.h"
+#include "edgetpu-usage-stats.h"
/* the index of mailbox for kernel should always be zero */
#define KERNEL_MAILBOX_INDEX 0
@@ -31,7 +32,7 @@
/* Set extra ludicrously high to 60 seconds for (slow) Palladium emulation. */
#define KCI_TIMEOUT (60000)
#else
-/* 5 secs. TODO(134408592): Define a timeout for TPU CPU responses */
+/* 5 secs. */
#define KCI_TIMEOUT (5000)
#endif
@@ -730,6 +731,44 @@ enum edgetpu_fw_flavor edgetpu_kci_fw_info(
return flavor;
}
+void edgetpu_kci_update_usage(struct edgetpu_dev *etdev)
+{
+#define EDGETPU_USAGE_BUFFER_SIZE 4096
+ struct edgetpu_command_element cmd = {
+ .code = KCI_CODE_GET_USAGE,
+ .dma = {
+ .address = 0,
+ .size = 0,
+ },
+ };
+ struct edgetpu_coherent_mem mem;
+ struct edgetpu_kci_response_element resp;
+ int ret;
+
+ ret = edgetpu_iremap_alloc(etdev, EDGETPU_USAGE_BUFFER_SIZE, &mem,
+ EDGETPU_CONTEXT_KCI);
+
+ if (ret) {
+ etdev_warn_once(etdev, "%s: failed to allocate usage buffer",
+ __func__);
+ return;
+ }
+
+ cmd.dma.address = mem.tpu_addr;
+ cmd.dma.size = EDGETPU_USAGE_BUFFER_SIZE;
+ memset(mem.vaddr, 0, sizeof(struct usage_tracker_header));
+ ret = edgetpu_kci_send_cmd_return_resp(etdev->kci, &cmd, &resp);
+
+ if (ret == KCI_ERROR_UNIMPLEMENTED || ret == KCI_ERROR_UNAVAILABLE)
+ etdev_dbg(etdev, "firmware does not report usage\n");
+ else if (ret == KCI_ERROR_OK)
+ edgetpu_usage_stats_process_buffer(etdev, mem.vaddr);
+ else
+ etdev_warn_once(etdev, "%s: error %d", __func__, ret);
+
+ edgetpu_iremap_free(etdev, &mem, EDGETPU_CONTEXT_KCI);
+}
+
/* debugfs mappings dump */
void edgetpu_kci_mappings_show(struct edgetpu_dev *etdev, struct seq_file *s)
{
diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h
index fe11a3b..aa77d9a 100644
--- a/drivers/edgetpu/edgetpu-kci.h
+++ b/drivers/edgetpu/edgetpu-kci.h
@@ -95,6 +95,7 @@ enum edgetpu_kci_code {
KCI_CODE_OPEN_DEVICE = 9,
KCI_CODE_CLOSE_DEVICE = 10,
KCI_CODE_FIRMWARE_INFO = 11,
+ KCI_CODE_GET_USAGE = 12,
};
/*
@@ -244,6 +245,9 @@ int edgetpu_kci_ack(struct edgetpu_kci *kci);
enum edgetpu_fw_flavor edgetpu_kci_fw_info(
struct edgetpu_kci *kci, struct edgetpu_fw_info *fw_info);
+/* Retrieve usage tracking data from firmware, update info on host. */
+void edgetpu_kci_update_usage(struct edgetpu_dev *etdev);
+
/*
* Sends the "Map Log Buffer" command and waits for remote response.
*
diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c
index f2fb859..90b3ed8 100644
--- a/drivers/edgetpu/edgetpu-mailbox.c
+++ b/drivers/edgetpu/edgetpu-mailbox.c
@@ -710,14 +710,13 @@ void edgetpu_mailbox_reinit_vii(struct edgetpu_device_group *group)
void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev)
{
- int i;
+ struct edgetpu_list_group *l;
struct edgetpu_device_group *group;
u32 mailbox_ids = 0;
mutex_lock(&etdev->groups_lock);
- for (i = 0; i < EDGETPU_NGROUPS; i++) {
- group = etdev->groups[i];
- if (group && !edgetpu_group_mailbox_detached_locked(group)) {
+ etdev_for_each_group(etdev, l, group) {
+ if (!edgetpu_group_mailbox_detached_locked(group)) {
edgetpu_mailbox_reinit_vii(group);
if (edgetpu_device_group_is_finalized(group))
mailbox_ids |=
diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c
index 8905cf9..b700c19 100644
--- a/drivers/edgetpu/edgetpu-pm.c
+++ b/drivers/edgetpu/edgetpu-pm.c
@@ -201,11 +201,11 @@ static int pchannel_state_change_request(struct edgetpu_dev *etdev, int state)
if (val & PDENY) {
edgetpu_dev_write_32(etdev, EDGETPU_REG_POWER_CONTROL,
val & !state);
- etdev_err(etdev, "p-channel state change request denied\n");
+ etdev_dbg(etdev, "p-channel state change request denied\n");
deny = true;
}
if (ret) {
- etdev_err(etdev, "p-channel state change request timeout\n");
+ etdev_dbg(etdev, "p-channel state change request timeout\n");
return ret;
}
/* Phase 4. Drive PREQ to 0 */
@@ -226,7 +226,8 @@ int edgetpu_pchannel_power_down(struct edgetpu_dev *etdev, bool wait_on_pactive)
edgetpu_sw_wdt_stop(etdev);
ret = edgetpu_kci_shutdown(etdev->kci);
if (ret) {
- etdev_err(etdev, "request power down routing failed\n");
+ etdev_err(etdev, "p-channel power down routing failed: %d",
+ ret);
return ret;
}
if (wait_on_pactive) {
@@ -241,6 +242,10 @@ int edgetpu_pchannel_power_down(struct edgetpu_dev *etdev, bool wait_on_pactive)
tries--;
} while (ret && tries);
+ if (ret)
+ etdev_err(etdev, "p-channel shutdown state change failed: %d",
+ ret);
+
return ret;
}
diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.c b/drivers/edgetpu/edgetpu-sw-watchdog.c
index 397c021..2db2b47 100644
--- a/drivers/edgetpu/edgetpu-sw-watchdog.c
+++ b/drivers/edgetpu/edgetpu-sw-watchdog.c
@@ -25,6 +25,23 @@ static void sw_wdt_handler_work(struct work_struct *work)
et_action_work->edgetpu_sw_wdt_handler(et_action_work->data);
}
+void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset)
+{
+ if (!etdev->etdev_sw_wdt)
+ return;
+ /*
+ * Stop sw wdog delayed worker, to reduce chance this explicit call
+ * races with a sw wdog timeout. May be in IRQ context, no sync,
+ * worker may already be active. If we race with a sw wdog restart
+ * and need a chip reset, hopefully the P-channel reset will fail
+ * and the bigger hammer chip reset will kick in at that point.
+ */
+ cancel_delayed_work(&etdev->etdev_sw_wdt->dwork);
+ etdev_err(etdev, "watchdog %s", reset ? "reset" : "restart");
+ etdev->reset_needed = reset;
+ schedule_work(&etdev->etdev_sw_wdt->et_action_work.work);
+}
+
/*
* Ping the f/w for a response. Reschedule the work for next beat
* in case of response or schedule a worker for action callback in case of
diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.h b/drivers/edgetpu/edgetpu-sw-watchdog.h
index c278912..7b214b2 100644
--- a/drivers/edgetpu/edgetpu-sw-watchdog.h
+++ b/drivers/edgetpu/edgetpu-sw-watchdog.h
@@ -48,4 +48,11 @@ void edgetpu_sw_wdt_set_handler(struct edgetpu_dev *etdev,
void edgetpu_sw_wdt_modify_heartbeat(struct edgetpu_dev *etdev,
unsigned long hrtbeat_ms);
+/*
+ * Schedule sw watchdog action immediately. Called on fatal errors.
+ * @reset: true if error recovery requires a full chip reset, not just
+ * firmware restart.
+ */
+void edgetpu_watchdog_bite(struct edgetpu_dev *etdev, bool reset);
+
#endif /* __EDGETPU_SW_WDT_H__ */
diff --git a/drivers/edgetpu/edgetpu-thermal.h b/drivers/edgetpu/edgetpu-thermal.h
index c9d38bc..4e97f07 100644
--- a/drivers/edgetpu/edgetpu-thermal.h
+++ b/drivers/edgetpu/edgetpu-thermal.h
@@ -1,10 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Edge TPU thermal driver header.
+ * EdgeTPU thermal driver header.
*
* Copyright (C) 2020 Google, Inc.
*/
-
#ifndef __EDGETPU_THERMAL_H__
#define __EDGETPU_THERMAL_H__
diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c
new file mode 100644
index 0000000..b7b309c
--- /dev/null
+++ b/drivers/edgetpu/edgetpu-usage-stats.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * EdgeTPU usage stats
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+#include "edgetpu-internal.h"
+#include "edgetpu-usage-stats.h"
+
+#if IS_ENABLED(CONFIG_ABROLHOS)
+
+#include "abrolhos-pm.h"
+
+static enum tpu_pwr_state tpu_states_arr[] = {
+ TPU_ACTIVE_SUD,
+ TPU_ACTIVE_UD,
+ TPU_ACTIVE_NOM,
+ TPU_ACTIVE_OD,
+};
+
+#else /* !CONFIG_ABROLHOS */
+
+/* All execution times will be added to the same state. */
+static uint32_t tpu_states_arr[] = {
+ 0,
+};
+
+#endif /* CONFIG_ABROLHOS */
+
+#define NUM_TPU_STATES ARRAY_SIZE(tpu_states_arr)
+
+struct uid_entry {
+ int32_t uid;
+ uint64_t time_in_state[NUM_TPU_STATES];
+ struct hlist_node node;
+};
+
+static int tpu_state_map(uint32_t state)
+{
+ int i;
+
+ for (i = (NUM_TPU_STATES - 1); i >= 0; i--) {
+ if (state >= tpu_states_arr[i])
+ return i;
+ }
+
+ return 0;
+}
+
+/* Caller must hold usage_stats lock */
+static struct uid_entry *
+find_uid_entry_locked(int32_t uid, struct edgetpu_usage_stats *ustats)
+{
+ struct uid_entry *uid_entry;
+
+ hash_for_each_possible(ustats->uid_hash_table, uid_entry, node, uid) {
+ if (uid_entry->uid == uid)
+ return uid_entry;
+ }
+
+ return NULL;
+}
+
+int edgetpu_usage_stats_add(int32_t uid, uint32_t state, uint32_t duration,
+ struct edgetpu_dev *etdev)
+{
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+ struct uid_entry *uid_entry;
+
+ if (!ustats)
+ return 0;
+
+ etdev_dbg(etdev, "%s: uid=%u state=%u dur=%u", __func__, uid, state,
+ duration);
+ mutex_lock(&ustats->usage_stats_lock);
+
+ /* Find the uid in uid_hash_table first */
+ uid_entry = find_uid_entry_locked(uid, ustats);
+ if (uid_entry) {
+ uid_entry->time_in_state[tpu_state_map(state)] += duration;
+ mutex_unlock(&ustats->usage_stats_lock);
+ return 0;
+ }
+
+ /* Allocate memory for this uid */
+ uid_entry = kzalloc(sizeof(*uid_entry), GFP_KERNEL);
+ if (!uid_entry) {
+ mutex_unlock(&ustats->usage_stats_lock);
+ return -ENOMEM;
+ }
+
+ uid_entry->uid = uid;
+ uid_entry->time_in_state[tpu_state_map(state)] += duration;
+
+ /* Add uid_entry to the uid_hash_table */
+ hash_add(ustats->uid_hash_table, &uid_entry->node, uid);
+
+ mutex_unlock(&ustats->usage_stats_lock);
+
+ return 0;
+}
+
+void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf)
+{
+ struct usage_tracker_header *header = buf;
+ struct usage_tracker_metric *metric =
+ (struct usage_tracker_metric *)(header + 1);
+ int i;
+
+ etdev_dbg(etdev, "%s: n=%u sz=%u", __func__,
+ header->num_metrics, header->metric_size);
+ if (header->metric_size != sizeof(struct usage_tracker_metric)) {
+ etdev_dbg(etdev, "%s: expected sz=%zu, discard", __func__,
+ sizeof(struct usage_tracker_metric));
+ return;
+ }
+
+ for (i = 0; i < header->num_metrics; i++) {
+ switch (metric->type) {
+ case metric_type_tpu_usage:
+ {
+ struct tpu_usage *tpu_usage =
+ &metric->tpu_usage;
+
+ edgetpu_usage_stats_add(
+ tpu_usage->uid, tpu_usage->power_state,
+ tpu_usage->duration_us, etdev);
+ }
+ break;
+ default:
+ etdev_dbg(etdev, "%s: %d: skip unknown type=%u",
+ __func__, i, metric->type);
+ break;
+ }
+
+ metric++;
+ }
+}
+
+static ssize_t usage_stats_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+ int i;
+ int ret = 0;
+ unsigned int bkt;
+ struct uid_entry *uid_entry;
+
+ /* uid: TPU_ACTIVE_SUD TPU_ACTIVE_UD TPU_ACTIVE_NOM TPU_ACTIVE_OD */
+ ret += scnprintf(buf, PAGE_SIZE, "uid:");
+
+ for (i = 0; i < NUM_TPU_STATES; i++)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, " %d",
+ tpu_states_arr[i]);
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+
+ mutex_lock(&ustats->usage_stats_lock);
+
+ hash_for_each(ustats->uid_hash_table, bkt, uid_entry, node) {
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%d:",
+ uid_entry->uid);
+
+ for (i = 0; i < NUM_TPU_STATES; i++)
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, " %lld",
+ uid_entry->time_in_state[i]);
+
+ ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
+ }
+
+ mutex_unlock(&ustats->usage_stats_lock);
+
+ return ret;
+}
+
+static void usage_stats_remove_uids(struct edgetpu_usage_stats *ustats)
+{
+ unsigned int bkt;
+ struct uid_entry *uid_entry;
+ struct hlist_node *tmp;
+
+ mutex_lock(&ustats->usage_stats_lock);
+
+ hash_for_each_safe(ustats->uid_hash_table, bkt, tmp, uid_entry, node) {
+ hash_del(&uid_entry->node);
+ kfree(uid_entry);
+ }
+
+ mutex_unlock(&ustats->usage_stats_lock);
+}
+
+/* Write to clear all entries in uid_hash_table */
+static ssize_t usage_stats_clear(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+
+ usage_stats_remove_uids(ustats);
+
+ return count;
+}
+
+static DEVICE_ATTR(usage_stats, 0644, usage_stats_show, usage_stats_clear);
+
+void edgetpu_usage_stats_init(struct edgetpu_dev *etdev)
+{
+ struct edgetpu_usage_stats *ustats;
+ int ret;
+
+ ustats = devm_kzalloc(etdev->dev, sizeof(*etdev->usage_stats),
+ GFP_KERNEL);
+ if (!ustats) {
+ etdev_warn(etdev,
+ "failed to allocate memory for usage stats\n");
+ return;
+ }
+
+ hash_init(ustats->uid_hash_table);
+ mutex_init(&ustats->usage_stats_lock);
+
+ etdev->usage_stats = ustats;
+
+ ret = device_create_file(etdev->dev, &dev_attr_usage_stats);
+ if (ret)
+ etdev_warn(etdev, "failed to create the usage_stats file\n");
+
+ etdev_dbg(etdev, "%s init\n", __func__);
+}
+
+void edgetpu_usage_stats_exit(struct edgetpu_dev *etdev)
+{
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+
+ if (ustats) {
+ usage_stats_remove_uids(ustats);
+ device_remove_file(etdev->dev, &dev_attr_usage_stats);
+ }
+
+ etdev_dbg(etdev, "%s exit\n", __func__);
+}
diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h
new file mode 100644
index 0000000..42d75df
--- /dev/null
+++ b/drivers/edgetpu/edgetpu-usage-stats.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * EdgeTPU usage stats header
+ *
+ * Copyright (C) 2020 Google, Inc.
+ */
+#ifndef __EDGETPU_USAGE_STATS_H__
+#define __EDGETPU_USAGE_STATS_H__
+
+#include <linux/hashtable.h>
+#include <linux/mutex.h>
+
+/* Header struct in the metric buffer. */
+/* Must be kept in sync with firmware struct UsageTrackerHeader */
+struct usage_tracker_header {
+ uint32_t num_metrics; /* Number of metrics being reported */
+ uint32_t metric_size; /* Size of each metric struct */
+};
+
+/*
+ * Encapsulate TPU core usage information of a specific application for a
+ * specific power state.
+ * Must be kept in sync with firmware struct TpuUsage.
+ */
+struct tpu_usage {
+ /* Unique identifier of the application. */
+ int32_t uid;
+ /* The power state of the device (values are chip dependent) */
+ uint32_t power_state;
+ /* Duration of usage in microseconds. */
+ uint32_t duration_us;
+};
+
+/* Must be kept in sync with firmware enum class UsageTrackerMetric::Type */
+enum usage_tracker_metric_type {
+ metric_type_reserved = 0,
+ metric_type_tpu_usage = 1,
+};
+
+/*
+ * Encapsulates a single metric reported to the kernel.
+ * Must be kept in sync with firmware struct UsageTrackerMetric.
+ */
+struct usage_tracker_metric {
+ uint32_t type;
+ uint8_t reserved[4];
+ union {
+ struct tpu_usage tpu_usage;
+ };
+};
+
+#define UID_HASH_BITS 3
+
+struct edgetpu_usage_stats {
+ DECLARE_HASHTABLE(uid_hash_table, UID_HASH_BITS);
+ struct mutex usage_stats_lock;
+};
+
+int edgetpu_usage_stats_add(int32_t uid, uint32_t state, uint32_t duration,
+ struct edgetpu_dev *etdev);
+void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf);
+void edgetpu_usage_stats_init(struct edgetpu_dev *etdev);
+void edgetpu_usage_stats_exit(struct edgetpu_dev *etdev);
+
+#endif /* __EDGETPU_USAGE_STATS_H__ */
diff --git a/drivers/edgetpu/mm-backport.h b/drivers/edgetpu/mm-backport.h
new file mode 100644
index 0000000..2e2f9a7
--- /dev/null
+++ b/drivers/edgetpu/mm-backport.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Backport mm APIs.
+ *
+ * Copyright (C) 2021 Google, Inc.
+ */
+#ifndef __MM_BACKPORT_H__
+#define __MM_BACKPORT_H__
+
+#include <linux/mm.h>
+
+/*
+ * Define pin_user_pages* which are introduced in Linux 5.6.
+ *
+ * We simply define pin_user_pages* as get_user_pages* here so our driver can
+ * prefer PIN over GET when possible.
+ */
+#ifndef FOLL_PIN
+
+/* define as zero to prevent older get_user_pages* returning EINVAL */
+#define FOLL_LONGTERM 0
+
+#define pin_user_pages_fast get_user_pages_fast
+#define unpin_user_page put_page
+
+#endif /* FOLL_PIN */
+
+#endif /* __MM_BACKPORT_H__ */