summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWhi copybara merger <whitechapel-automerger@google.com>2023-03-13 05:14:13 +0000
committerTodd Poynor <toddpoynor@google.com>2023-03-13 07:03:12 +0000
commitfc120daa7c55e96bb4cd2e6ab319506fe6a7f4bd (patch)
tree0cebc925f02fbbb715899c3e397454dc73e69b78
parentad04c3e4228f016f84fa11db7631e5b49231f7f5 (diff)
downloadjaneiro-fc120daa7c55e96bb4cd2e6ab319506fe6a7f4bd.tar.gz
edgetpu: Only call .power_up if needed Bug: 272701322 edgetpu: Downgrade warning on external mailbox alloc Bug: 269476405 edgetpu: usage stats add field definitions for metrics v2 edgetpu: remove "_locked" from edgetpu_firmware_tracing_set_level Bug: 262916889 edgetpu: usage stats ignore metric fields beyond known size Bug: 271372136 edgetpu: Add firmware dynamic tracing support Bug: 262916889 (repeat) edgetpu: Add KCI handing for dynamic fw tracing levels Bug: 262916889 (repeat) edgetpu: Add missing pm error handling GitOrigin-RevId: c501899a7d9529f3b85a65d4792f1985452225d5 Change-Id: I604850162c7aa3b3310c6d5802dbba1bc2fa64fa
-rw-r--r--drivers/edgetpu/edgetpu-external.c4
-rw-r--r--drivers/edgetpu/edgetpu-firmware.c149
-rw-r--r--drivers/edgetpu/edgetpu-kci.c20
-rw-r--r--drivers/edgetpu/edgetpu-kci.h6
-rw-r--r--drivers/edgetpu/edgetpu-pm.c11
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.c13
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.h49
-rw-r--r--drivers/edgetpu/mobile-pm.c15
8 files changed, 253 insertions, 14 deletions
diff --git a/drivers/edgetpu/edgetpu-external.c b/drivers/edgetpu/edgetpu-external.c
index 4b86e13..b954844 100644
--- a/drivers/edgetpu/edgetpu-external.c
+++ b/drivers/edgetpu/edgetpu-external.c
@@ -95,8 +95,8 @@ static int edgetpu_external_mailbox_alloc(struct device *edgetpu_dev,
if (copy_from_user(&req.attr, (void __user *)client_info->attr, sizeof(req.attr))) {
if (!client_info->attr)
- etdev_warn(client->etdev,
- "Illegal mailbox attributes, using VII mailbox attrs\n");
+ etdev_dbg(client->etdev,
+ "Using VII mailbox attrs for external mailbox\n");
req.attr = group->mbox_attr;
}
diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c
index 1ef1354..cf9009b 100644
--- a/drivers/edgetpu/edgetpu-firmware.c
+++ b/drivers/edgetpu/edgetpu-firmware.c
@@ -5,6 +5,7 @@
* Copyright (C) 2019-2020 Google, Inc.
*/
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/firmware.h>
@@ -29,6 +30,30 @@
static char *firmware_name;
module_param(firmware_name, charp, 0660);
+/*
+ * Any tracing level vote with the following bit set will be considered as a default vote.
+ */
+#define EDGETPU_FW_TRACING_DEFAULT_VOTE BIT(8)
+
+struct edgetpu_fw_tracing {
+ struct device *dev;
+ struct dentry *dentry;
+
+ /*
+ * Lock to protect the struct members listed below.
+ *
+ * Note that since the request of tracing level adjusting might happen during power state
+ * transitions (i.e., another thread calling edgetpu_firmware_tracing_restore_on_powering()
+ * with pm lock held), one must either use the non-blocking edgetpu_pm_trylock() or make
+ * sure there won't be any new power transition after holding this lock to prevent deadlock.
+ */
+ struct mutex lock;
+ /* Actual firmware tracing level. */
+ unsigned long active_level;
+ /* Requested firmware tracing level. */
+ unsigned long request_level;
+};
+
struct edgetpu_firmware_private {
const struct edgetpu_firmware_chip_data *chip_fw;
void *data; /* for edgetpu_firmware_(set/get)_data */
@@ -38,6 +63,7 @@ struct edgetpu_firmware_private {
struct edgetpu_firmware_desc bl1_fw_desc;
enum edgetpu_firmware_status status;
struct edgetpu_fw_info fw_info;
+ struct edgetpu_fw_tracing fw_tracing;
};
void edgetpu_firmware_set_data(struct edgetpu_firmware *et_fw, void *data)
@@ -134,6 +160,124 @@ static char *fw_flavor_str(enum edgetpu_fw_flavor fw_flavor)
return "?";
}
+static int edgetpu_firmware_tracing_active_get(void *data, u64 *val)
+{
+ struct edgetpu_firmware *et_fw = data;
+ struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing;
+
+ mutex_lock(&fw_tracing->lock);
+ *val = fw_tracing->active_level;
+ mutex_unlock(&fw_tracing->lock);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_edgetpu_firmware_tracing_active, edgetpu_firmware_tracing_active_get,
+ NULL, "%llu\n");
+
+static int edgetpu_firmware_tracing_request_get(void *data, u64 *val)
+{
+ struct edgetpu_firmware *et_fw = data;
+ struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing;
+
+ mutex_lock(&fw_tracing->lock);
+ *val = fw_tracing->request_level;
+ mutex_unlock(&fw_tracing->lock);
+
+ return 0;
+}
+
+/*
+ * fw_tracing->lock may optionally be held if the caller wants the new level to be set as a
+ * critical section. If not held the caller is syncing current tracing level but not as a critical
+ * section with the calling code. Firmware tracing levels are not expected to change frequently or
+ * via concurrent requests. Only the code that restore the tracing level at power up requires
+ * consistency with the state managed by the calling code. Since this code is called as part of
+ * power up processing, in order to avoid deadlocks, most callers set a requested state and then
+ * sync the current state to firmware (if powered on) without holding the lock across the powered-on
+ * check, with no harm done if the requested state changed again using a concurrent request.
+ */
+static int edgetpu_firmware_tracing_set_level(struct edgetpu_firmware *et_fw)
+{
+ unsigned long active_level;
+ struct edgetpu_dev *etdev = et_fw->etdev;
+ struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing;
+ int ret = edgetpu_kci_firmware_tracing_level(etdev, fw_tracing->request_level,
+ &active_level);
+
+ if (ret)
+ etdev_warn(et_fw->etdev, "Failed to set firmware tracing level to %lu: %d",
+ fw_tracing->request_level, ret);
+ else
+ fw_tracing->active_level =
+ (fw_tracing->request_level & EDGETPU_FW_TRACING_DEFAULT_VOTE) ?
+ EDGETPU_FW_TRACING_DEFAULT_VOTE : active_level;
+
+ return ret;
+}
+
+static int edgetpu_firmware_tracing_request_set(void *data, u64 val)
+{
+ struct edgetpu_firmware *et_fw = data;
+ struct edgetpu_dev *etdev = et_fw->etdev;
+ struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing;
+ int ret = 0;
+
+ mutex_lock(&fw_tracing->lock);
+ fw_tracing->request_level = val;
+ mutex_unlock(&fw_tracing->lock);
+
+ if (edgetpu_pm_get_if_powered(etdev->pm)) {
+ ret = edgetpu_firmware_tracing_set_level(et_fw);
+ edgetpu_pm_put(etdev->pm);
+ }
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_edgetpu_firmware_tracing_request,
+ edgetpu_firmware_tracing_request_get, edgetpu_firmware_tracing_request_set,
+ "%llu\n");
+
+static void edgetpu_firmware_tracing_init(struct edgetpu_firmware *et_fw)
+{
+ struct edgetpu_dev *etdev = et_fw->etdev;
+ struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing;
+
+ fw_tracing->active_level = EDGETPU_FW_TRACING_DEFAULT_VOTE;
+ fw_tracing->request_level = EDGETPU_FW_TRACING_DEFAULT_VOTE;
+ mutex_init(&fw_tracing->lock);
+
+ fw_tracing->dentry = debugfs_create_dir("fw_tracing", etdev->d_entry);
+ if (IS_ERR(fw_tracing->dentry)) {
+ etdev_warn(etdev, "Failed to create fw tracing debugfs interface");
+ return;
+ }
+
+ debugfs_create_file("active", 0440, fw_tracing->dentry, et_fw,
+ &fops_edgetpu_firmware_tracing_active);
+ debugfs_create_file("request", 0660, fw_tracing->dentry, et_fw,
+ &fops_edgetpu_firmware_tracing_request);
+}
+
+static void edgetpu_firmware_tracing_destroy(struct edgetpu_firmware *et_fw)
+{
+ debugfs_remove_recursive(et_fw->p->fw_tracing.dentry);
+}
+
+static int edgetpu_firmware_tracing_restore_on_powering(struct edgetpu_firmware *et_fw)
+{
+ int ret = 0;
+ struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing;
+
+ mutex_lock(&fw_tracing->lock);
+ fw_tracing->active_level = EDGETPU_FW_TRACING_DEFAULT_VOTE;
+ if (!(fw_tracing->request_level & EDGETPU_FW_TRACING_DEFAULT_VOTE))
+ ret = edgetpu_firmware_tracing_set_level(et_fw);
+ mutex_unlock(&fw_tracing->lock);
+ return ret;
+}
+
static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw)
{
struct edgetpu_dev *etdev = et_fw->etdev;
@@ -172,6 +316,9 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw)
if (ret)
etdev_warn(etdev, "telemetry KCI error: %d", ret);
+ ret = edgetpu_firmware_tracing_restore_on_powering(et_fw);
+ if (ret)
+ etdev_warn_ratelimited(etdev, "firmware tracing restore error: %d", ret);
/* Set debug dump buffer in FW */
edgetpu_get_debug_dump(etdev, 0);
}
@@ -687,6 +834,7 @@ int edgetpu_firmware_create(struct edgetpu_dev *etdev,
else
edgetpu_sw_wdt_set_handler(
etdev, edgetpu_firmware_wdt_timeout_action, etdev);
+ edgetpu_firmware_tracing_init(et_fw);
return 0;
out_device_remove_group:
@@ -724,6 +872,7 @@ void edgetpu_firmware_destroy(struct edgetpu_dev *etdev)
edgetpu_firmware_unload_locked(et_fw, &et_fw->p->fw_desc);
edgetpu_firmware_unload_locked(et_fw, &et_fw->p->bl1_fw_desc);
mutex_unlock(&et_fw->p->fw_desc_lock);
+ edgetpu_firmware_tracing_destroy(et_fw);
}
etdev->firmware = NULL;
diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c
index 0c6f5ad..65c16ec 100644
--- a/drivers/edgetpu/edgetpu-kci.c
+++ b/drivers/edgetpu/edgetpu-kci.c
@@ -938,6 +938,7 @@ int edgetpu_kci_update_usage_locked(struct edgetpu_dev *etdev)
.dma = {
.address = 0,
.size = 0,
+ .flags = EDGETPU_USAGE_METRIC_VERSION,
},
};
struct edgetpu_coherent_mem mem;
@@ -1093,6 +1094,25 @@ int edgetpu_kci_block_bus_speed_control(struct edgetpu_dev *etdev, bool block)
return edgetpu_kci_send_cmd(etdev->kci, &cmd);
}
+int edgetpu_kci_firmware_tracing_level(struct edgetpu_dev *etdev, unsigned long level,
+ unsigned long *active_level)
+{
+ struct edgetpu_command_element cmd = {
+ .code = KCI_CODE_FIRMWARE_TRACING_LEVEL,
+ .dma = {
+ .flags = (u32)level,
+ },
+ };
+ struct edgetpu_kci_response_element resp;
+ int ret;
+
+ ret = edgetpu_kci_send_cmd_return_resp(etdev->kci, &cmd, &resp);
+ if (ret == KCI_ERROR_OK)
+ *active_level = resp.retval;
+
+ return ret;
+}
+
int edgetpu_kci_resp_rkci_ack(struct edgetpu_dev *etdev,
struct edgetpu_kci_response_element *rkci_cmd)
{
diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h
index a11a181..b32b097 100644
--- a/drivers/edgetpu/edgetpu-kci.h
+++ b/drivers/edgetpu/edgetpu-kci.h
@@ -115,6 +115,8 @@ enum edgetpu_kci_code {
KCI_CODE_GET_USAGE = 12,
KCI_CODE_NOTIFY_THROTTLING = 13,
KCI_CODE_BLOCK_BUS_SPEED_CONTROL = 14,
+ /* 15..18 not implemented in this branch */
+ KCI_CODE_FIRMWARE_TRACING_LEVEL = 19,
KCI_CODE_RKCI_ACK = 256,
};
@@ -404,6 +406,10 @@ int edgetpu_kci_notify_throttling(struct edgetpu_dev *etdev, u32 level);
*/
int edgetpu_kci_block_bus_speed_control(struct edgetpu_dev *etdev, bool block);
+/* Set the firmware tracing level. */
+int edgetpu_kci_firmware_tracing_level(struct edgetpu_dev *etdev, unsigned long level,
+ unsigned long *active_level);
+
/*
* Send an ack to the FW after handling a reverse KCI request.
*
diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c
index a71232d..40d41ff 100644
--- a/drivers/edgetpu/edgetpu-pm.c
+++ b/drivers/edgetpu/edgetpu-pm.c
@@ -53,9 +53,13 @@ static int edgetpu_pm_get_locked(struct edgetpu_pm *etpm)
int ret = 0;
if (!power_up_count) {
- ret = etpm->p->handlers->power_up(etpm);
- if (!ret)
- edgetpu_mailbox_restore_active_mailbox_queues(etpm->etdev);
+ if (etpm->p->power_down_pending) {
+ etpm->p->power_down_pending = false;
+ } else {
+ ret = etpm->p->handlers->power_up(etpm);
+ if (!ret)
+ edgetpu_mailbox_restore_active_mailbox_queues(etpm->etdev);
+ }
}
if (ret)
etpm->p->power_up_count--;
@@ -103,7 +107,6 @@ int edgetpu_pm_get(struct edgetpu_pm *etpm)
return 0;
mutex_lock(&etpm->p->lock);
- etpm->p->power_down_pending = false;
ret = edgetpu_pm_get_locked(etpm);
mutex_unlock(&etpm->p->lock);
diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c
index ba93d49..e7b224c 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.c
+++ b/drivers/edgetpu/edgetpu-usage-stats.c
@@ -241,19 +241,22 @@ out:
void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf)
{
- struct edgetpu_usage_header *header = buf;
+ struct edgetpu_usage_header_v1 *header = buf;
struct edgetpu_usage_metric *metric =
(struct edgetpu_usage_metric *)(header + 1);
int i;
etdev_dbg(etdev, "%s: n=%u sz=%u", __func__,
header->num_metrics, header->metric_size);
- if (header->metric_size != sizeof(struct edgetpu_usage_metric)) {
- etdev_dbg(etdev, "%s: expected sz=%zu, discard", __func__,
- sizeof(struct edgetpu_usage_metric));
+ if (header->metric_size < EDGETPU_USAGE_METRIC_SIZE_V1) {
+ etdev_warn_once(etdev, "fw metric size %u less than minimum %u",
+ header->metric_size, EDGETPU_USAGE_METRIC_SIZE_V1);
return;
}
+ if (header->metric_size > sizeof(struct edgetpu_usage_metric))
+ etdev_dbg(etdev, "fw metrics are later version with unknown fields");
+
for (i = 0; i < header->num_metrics; i++) {
switch (metric->type) {
case EDGETPU_METRIC_TYPE_TPU_USAGE:
@@ -284,7 +287,7 @@ void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf)
break;
}
- metric++;
+ metric = (struct edgetpu_usage_metric *)((char *)metric + header->metric_size);
}
}
diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h
index a60b107..9d93122 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.h
+++ b/drivers/edgetpu/edgetpu-usage-stats.h
@@ -10,9 +10,28 @@
#include <linux/hashtable.h>
#include <linux/mutex.h>
+/* The highest version of usage metrics handled by this driver. */
+#define EDGETPU_USAGE_METRIC_VERSION 1
+
+/*
+ * Size in bytes of usage metric v1.
+ * If fewer bytes than this are received then discard the invalid buffer.
+ * This size also identifies the fw response as v1; subsequent versions will add another field
+ * with the version number.
+ */
+#define EDGETPU_USAGE_METRIC_SIZE_V1 20
+
+/* v1 metric header struct. */
+struct edgetpu_usage_header_v1 {
+ uint32_t num_metrics; /* Number of metrics being reported */
+ uint32_t metric_size; /* Size of each metric struct */
+};
+
/* Header struct in the metric buffer. */
/* Must be kept in sync with firmware struct UsageTrackerHeader */
struct edgetpu_usage_header {
+ uint16_t header_bytes; /* Number of bytes in this header */
+ uint16_t version; /* Metrics version */
uint32_t num_metrics; /* Number of metrics being reported */
uint32_t metric_size; /* Size of each metric struct */
};
@@ -20,15 +39,24 @@ struct edgetpu_usage_header {
/*
* Encapsulate TPU core usage information of a specific application for a
* specific power state.
- * Must be kept in sync with firmware struct TpuUsage.
+ * Must be kept in sync with firmware struct CoreUsage.
*/
struct tpu_usage {
/* Unique identifier of the application. */
int32_t uid;
/* The power state of the device (values are chip dependent) */
+ /* Now called operating_point in FW. */
uint32_t power_state;
/* Duration of usage in microseconds. */
uint32_t duration_us;
+
+ /* Following fields are added in metrics v2 */
+
+ /* Compute Core: TPU cluster ID. */
+ /* Called core_id in FW. */
+ uint8_t cluster_id;
+ /* Reserved. Filling out the next 32-bit boundary. */
+ uint8_t reserved[3];
};
/*
@@ -62,7 +90,7 @@ enum edgetpu_usage_counter_type {
EDGETPU_COUNTER_TPU_ACTIVE_CYCLES = 0,
/* Number of stalls caused by throttling. */
EDGETPU_COUNTER_TPU_THROTTLE_STALLS = 1,
- /* Number of graph invocations. */
+ /* Number of graph invocations. (Now called kWorkload in FW.) */
EDGETPU_COUNTER_INFERENCES = 2,
/* Number of TPU offload op invocations. */
EDGETPU_COUNTER_TPU_OPS = 3,
@@ -81,7 +109,12 @@ enum edgetpu_usage_counter_type {
/* Number of times (firmware)suspend function takes longer than SLA time. */
EDGETPU_COUNTER_LONG_SUSPEND = 10,
- EDGETPU_COUNTER_COUNT = 11, /* number of counters above */
+ /* The following counters are added in metrics v2. */
+
+ /* Number of context switches on a compute core. */
+ EDGETPU_COUNTER_CONTEXT_SWITCHES = 11,
+
+ EDGETPU_COUNTER_COUNT = 12, /* number of counters above */
};
/* Generic counter. Only reported if it has a value larger than 0. */
@@ -91,6 +124,11 @@ struct __packed edgetpu_usage_counter {
/* Accumulated value since last initialization. */
uint64_t value;
+
+ /* Following fields are added in metrics v2 */
+
+ /* Reporting component. */
+ uint8_t component_id;
};
/* Defines different max watermarks we track. */
@@ -121,6 +159,11 @@ struct __packed edgetpu_usage_max_watermark {
* non-mobile, firmware boot on mobile).
*/
uint64_t value;
+
+ /* Following fields are added in metrics v2 */
+
+ /* Reporting component. */
+ uint8_t component_id;
};
/* An enum to identify the tracked firmware threads. */
diff --git a/drivers/edgetpu/mobile-pm.c b/drivers/edgetpu/mobile-pm.c
index 9799172..50c3866 100644
--- a/drivers/edgetpu/mobile-pm.c
+++ b/drivers/edgetpu/mobile-pm.c
@@ -64,6 +64,7 @@ static int mobile_pwr_state_init(struct device *dev)
ret = pm_runtime_get_sync(dev);
if (ret) {
pm_runtime_put_noidle(dev);
+ pm_runtime_disable(dev);
dev_err(dev, "pm_runtime_get_sync returned %d\n", ret);
return ret;
}
@@ -74,6 +75,7 @@ static int mobile_pwr_state_init(struct device *dev)
dev_err(dev, "error initializing tpu state: %d\n", ret);
if (curr_state > TPU_OFF)
pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
return ret;
}
@@ -603,6 +605,7 @@ static int mobile_pm_after_create(struct edgetpu_pm *etpm)
struct edgetpu_mobile_platform_dev *etmdev = to_mobile_dev(etdev);
struct device *dev = etdev->dev;
struct edgetpu_mobile_platform_pwr *platform_pwr = &etmdev->platform_pwr;
+ int curr_state;
ret = mobile_pwr_state_init(dev);
if (ret)
@@ -644,7 +647,19 @@ static int mobile_pm_after_create(struct edgetpu_pm *etpm)
if (platform_pwr->after_create)
ret = platform_pwr->after_create(etdev);
+ if (ret)
+ goto err_debugfs_remove;
+
+ return 0;
+err_debugfs_remove:
+ debugfs_remove_recursive(platform_pwr->debugfs_dir);
+ /* pm_runtime_{enable,get_sync} were called in mobile_pwr_state_init */
+
+ curr_state = exynos_acpm_get_rate(TPU_ACPM_DOMAIN, 0);
+ if (curr_state > TPU_OFF)
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
return ret;
}