diff options
author | Whi copybara merger <whitechapel-automerger@google.com> | 2023-03-13 05:14:13 +0000 |
---|---|---|
committer | Todd Poynor <toddpoynor@google.com> | 2023-03-13 07:03:12 +0000 |
commit | fc120daa7c55e96bb4cd2e6ab319506fe6a7f4bd (patch) | |
tree | 0cebc925f02fbbb715899c3e397454dc73e69b78 | |
parent | ad04c3e4228f016f84fa11db7631e5b49231f7f5 (diff) | |
download | janeiro-fc120daa7c55e96bb4cd2e6ab319506fe6a7f4bd.tar.gz |
[Copybara Auto Merge] Merge branch pro into android13-gs-pixel-5.10-udcandroid-u-beta-1_r0.5android-u-beta-1_r0.4android-u-beta-1_r0.3android-gs-raviole-5.10-u-beta1android-gs-pantah-5.10-u-beta1android-gs-bluejay-5.10-u-beta1
edgetpu: Only call .power_up if needed
Bug: 272701322
edgetpu: Downgrade warning on external mailbox alloc
Bug: 269476405
edgetpu: usage stats add field definitions for metrics v2
edgetpu: remove "_locked" from edgetpu_firmware_tracing_set_level
Bug: 262916889
edgetpu: usage stats ignore metric fields beyond known size
Bug: 271372136
edgetpu: Add firmware dynamic tracing support
Bug: 262916889 (repeat)
edgetpu: Add KCI handing for dynamic fw tracing levels
Bug: 262916889 (repeat)
edgetpu: Add missing pm error handling
GitOrigin-RevId: c501899a7d9529f3b85a65d4792f1985452225d5
Change-Id: I604850162c7aa3b3310c6d5802dbba1bc2fa64fa
-rw-r--r-- | drivers/edgetpu/edgetpu-external.c | 4 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-firmware.c | 149 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-kci.c | 20 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-kci.h | 6 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-pm.c | 11 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-usage-stats.c | 13 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-usage-stats.h | 49 | ||||
-rw-r--r-- | drivers/edgetpu/mobile-pm.c | 15 |
8 files changed, 253 insertions, 14 deletions
diff --git a/drivers/edgetpu/edgetpu-external.c b/drivers/edgetpu/edgetpu-external.c index 4b86e13..b954844 100644 --- a/drivers/edgetpu/edgetpu-external.c +++ b/drivers/edgetpu/edgetpu-external.c @@ -95,8 +95,8 @@ static int edgetpu_external_mailbox_alloc(struct device *edgetpu_dev, if (copy_from_user(&req.attr, (void __user *)client_info->attr, sizeof(req.attr))) { if (!client_info->attr) - etdev_warn(client->etdev, - "Illegal mailbox attributes, using VII mailbox attrs\n"); + etdev_dbg(client->etdev, + "Using VII mailbox attrs for external mailbox\n"); req.attr = group->mbox_attr; } diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index 1ef1354..cf9009b 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -5,6 +5,7 @@ * Copyright (C) 2019-2020 Google, Inc. */ +#include <linux/debugfs.h> #include <linux/delay.h> #include <linux/device.h> #include <linux/firmware.h> @@ -29,6 +30,30 @@ static char *firmware_name; module_param(firmware_name, charp, 0660); +/* + * Any tracing level vote with the following bit set will be considered as a default vote. + */ +#define EDGETPU_FW_TRACING_DEFAULT_VOTE BIT(8) + +struct edgetpu_fw_tracing { + struct device *dev; + struct dentry *dentry; + + /* + * Lock to protect the struct members listed below. + * + * Note that since the request of tracing level adjusting might happen during power state + * transitions (i.e., another thread calling edgetpu_firmware_tracing_restore_on_powering() + * with pm lock held), one must either use the non-blocking edgetpu_pm_trylock() or make + * sure there won't be any new power transition after holding this lock to prevent deadlock. + */ + struct mutex lock; + /* Actual firmware tracing level. */ + unsigned long active_level; + /* Requested firmware tracing level. */ + unsigned long request_level; +}; + struct edgetpu_firmware_private { const struct edgetpu_firmware_chip_data *chip_fw; void *data; /* for edgetpu_firmware_(set/get)_data */ @@ -38,6 +63,7 @@ struct edgetpu_firmware_private { struct edgetpu_firmware_desc bl1_fw_desc; enum edgetpu_firmware_status status; struct edgetpu_fw_info fw_info; + struct edgetpu_fw_tracing fw_tracing; }; void edgetpu_firmware_set_data(struct edgetpu_firmware *et_fw, void *data) @@ -134,6 +160,124 @@ static char *fw_flavor_str(enum edgetpu_fw_flavor fw_flavor) return "?"; } +static int edgetpu_firmware_tracing_active_get(void *data, u64 *val) +{ + struct edgetpu_firmware *et_fw = data; + struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing; + + mutex_lock(&fw_tracing->lock); + *val = fw_tracing->active_level; + mutex_unlock(&fw_tracing->lock); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_edgetpu_firmware_tracing_active, edgetpu_firmware_tracing_active_get, + NULL, "%llu\n"); + +static int edgetpu_firmware_tracing_request_get(void *data, u64 *val) +{ + struct edgetpu_firmware *et_fw = data; + struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing; + + mutex_lock(&fw_tracing->lock); + *val = fw_tracing->request_level; + mutex_unlock(&fw_tracing->lock); + + return 0; +} + +/* + * fw_tracing->lock may optionally be held if the caller wants the new level to be set as a + * critical section. If not held the caller is syncing current tracing level but not as a critical + * section with the calling code. Firmware tracing levels are not expected to change frequently or + * via concurrent requests. Only the code that restore the tracing level at power up requires + * consistency with the state managed by the calling code. Since this code is called as part of + * power up processing, in order to avoid deadlocks, most callers set a requested state and then + * sync the current state to firmware (if powered on) without holding the lock across the powered-on + * check, with no harm done if the requested state changed again using a concurrent request. + */ +static int edgetpu_firmware_tracing_set_level(struct edgetpu_firmware *et_fw) +{ + unsigned long active_level; + struct edgetpu_dev *etdev = et_fw->etdev; + struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing; + int ret = edgetpu_kci_firmware_tracing_level(etdev, fw_tracing->request_level, + &active_level); + + if (ret) + etdev_warn(et_fw->etdev, "Failed to set firmware tracing level to %lu: %d", + fw_tracing->request_level, ret); + else + fw_tracing->active_level = + (fw_tracing->request_level & EDGETPU_FW_TRACING_DEFAULT_VOTE) ? + EDGETPU_FW_TRACING_DEFAULT_VOTE : active_level; + + return ret; +} + +static int edgetpu_firmware_tracing_request_set(void *data, u64 val) +{ + struct edgetpu_firmware *et_fw = data; + struct edgetpu_dev *etdev = et_fw->etdev; + struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing; + int ret = 0; + + mutex_lock(&fw_tracing->lock); + fw_tracing->request_level = val; + mutex_unlock(&fw_tracing->lock); + + if (edgetpu_pm_get_if_powered(etdev->pm)) { + ret = edgetpu_firmware_tracing_set_level(et_fw); + edgetpu_pm_put(etdev->pm); + } + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_edgetpu_firmware_tracing_request, + edgetpu_firmware_tracing_request_get, edgetpu_firmware_tracing_request_set, + "%llu\n"); + +static void edgetpu_firmware_tracing_init(struct edgetpu_firmware *et_fw) +{ + struct edgetpu_dev *etdev = et_fw->etdev; + struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing; + + fw_tracing->active_level = EDGETPU_FW_TRACING_DEFAULT_VOTE; + fw_tracing->request_level = EDGETPU_FW_TRACING_DEFAULT_VOTE; + mutex_init(&fw_tracing->lock); + + fw_tracing->dentry = debugfs_create_dir("fw_tracing", etdev->d_entry); + if (IS_ERR(fw_tracing->dentry)) { + etdev_warn(etdev, "Failed to create fw tracing debugfs interface"); + return; + } + + debugfs_create_file("active", 0440, fw_tracing->dentry, et_fw, + &fops_edgetpu_firmware_tracing_active); + debugfs_create_file("request", 0660, fw_tracing->dentry, et_fw, + &fops_edgetpu_firmware_tracing_request); +} + +static void edgetpu_firmware_tracing_destroy(struct edgetpu_firmware *et_fw) +{ + debugfs_remove_recursive(et_fw->p->fw_tracing.dentry); +} + +static int edgetpu_firmware_tracing_restore_on_powering(struct edgetpu_firmware *et_fw) +{ + int ret = 0; + struct edgetpu_fw_tracing *fw_tracing = &et_fw->p->fw_tracing; + + mutex_lock(&fw_tracing->lock); + fw_tracing->active_level = EDGETPU_FW_TRACING_DEFAULT_VOTE; + if (!(fw_tracing->request_level & EDGETPU_FW_TRACING_DEFAULT_VOTE)) + ret = edgetpu_firmware_tracing_set_level(et_fw); + mutex_unlock(&fw_tracing->lock); + return ret; +} + static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw) { struct edgetpu_dev *etdev = et_fw->etdev; @@ -172,6 +316,9 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw) if (ret) etdev_warn(etdev, "telemetry KCI error: %d", ret); + ret = edgetpu_firmware_tracing_restore_on_powering(et_fw); + if (ret) + etdev_warn_ratelimited(etdev, "firmware tracing restore error: %d", ret); /* Set debug dump buffer in FW */ edgetpu_get_debug_dump(etdev, 0); } @@ -687,6 +834,7 @@ int edgetpu_firmware_create(struct edgetpu_dev *etdev, else edgetpu_sw_wdt_set_handler( etdev, edgetpu_firmware_wdt_timeout_action, etdev); + edgetpu_firmware_tracing_init(et_fw); return 0; out_device_remove_group: @@ -724,6 +872,7 @@ void edgetpu_firmware_destroy(struct edgetpu_dev *etdev) edgetpu_firmware_unload_locked(et_fw, &et_fw->p->fw_desc); edgetpu_firmware_unload_locked(et_fw, &et_fw->p->bl1_fw_desc); mutex_unlock(&et_fw->p->fw_desc_lock); + edgetpu_firmware_tracing_destroy(et_fw); } etdev->firmware = NULL; diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c index 0c6f5ad..65c16ec 100644 --- a/drivers/edgetpu/edgetpu-kci.c +++ b/drivers/edgetpu/edgetpu-kci.c @@ -938,6 +938,7 @@ int edgetpu_kci_update_usage_locked(struct edgetpu_dev *etdev) .dma = { .address = 0, .size = 0, + .flags = EDGETPU_USAGE_METRIC_VERSION, }, }; struct edgetpu_coherent_mem mem; @@ -1093,6 +1094,25 @@ int edgetpu_kci_block_bus_speed_control(struct edgetpu_dev *etdev, bool block) return edgetpu_kci_send_cmd(etdev->kci, &cmd); } +int edgetpu_kci_firmware_tracing_level(struct edgetpu_dev *etdev, unsigned long level, + unsigned long *active_level) +{ + struct edgetpu_command_element cmd = { + .code = KCI_CODE_FIRMWARE_TRACING_LEVEL, + .dma = { + .flags = (u32)level, + }, + }; + struct edgetpu_kci_response_element resp; + int ret; + + ret = edgetpu_kci_send_cmd_return_resp(etdev->kci, &cmd, &resp); + if (ret == KCI_ERROR_OK) + *active_level = resp.retval; + + return ret; +} + int edgetpu_kci_resp_rkci_ack(struct edgetpu_dev *etdev, struct edgetpu_kci_response_element *rkci_cmd) { diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h index a11a181..b32b097 100644 --- a/drivers/edgetpu/edgetpu-kci.h +++ b/drivers/edgetpu/edgetpu-kci.h @@ -115,6 +115,8 @@ enum edgetpu_kci_code { KCI_CODE_GET_USAGE = 12, KCI_CODE_NOTIFY_THROTTLING = 13, KCI_CODE_BLOCK_BUS_SPEED_CONTROL = 14, + /* 15..18 not implemented in this branch */ + KCI_CODE_FIRMWARE_TRACING_LEVEL = 19, KCI_CODE_RKCI_ACK = 256, }; @@ -404,6 +406,10 @@ int edgetpu_kci_notify_throttling(struct edgetpu_dev *etdev, u32 level); */ int edgetpu_kci_block_bus_speed_control(struct edgetpu_dev *etdev, bool block); +/* Set the firmware tracing level. */ +int edgetpu_kci_firmware_tracing_level(struct edgetpu_dev *etdev, unsigned long level, + unsigned long *active_level); + /* * Send an ack to the FW after handling a reverse KCI request. * diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c index a71232d..40d41ff 100644 --- a/drivers/edgetpu/edgetpu-pm.c +++ b/drivers/edgetpu/edgetpu-pm.c @@ -53,9 +53,13 @@ static int edgetpu_pm_get_locked(struct edgetpu_pm *etpm) int ret = 0; if (!power_up_count) { - ret = etpm->p->handlers->power_up(etpm); - if (!ret) - edgetpu_mailbox_restore_active_mailbox_queues(etpm->etdev); + if (etpm->p->power_down_pending) { + etpm->p->power_down_pending = false; + } else { + ret = etpm->p->handlers->power_up(etpm); + if (!ret) + edgetpu_mailbox_restore_active_mailbox_queues(etpm->etdev); + } } if (ret) etpm->p->power_up_count--; @@ -103,7 +107,6 @@ int edgetpu_pm_get(struct edgetpu_pm *etpm) return 0; mutex_lock(&etpm->p->lock); - etpm->p->power_down_pending = false; ret = edgetpu_pm_get_locked(etpm); mutex_unlock(&etpm->p->lock); diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c index ba93d49..e7b224c 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.c +++ b/drivers/edgetpu/edgetpu-usage-stats.c @@ -241,19 +241,22 @@ out: void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf) { - struct edgetpu_usage_header *header = buf; + struct edgetpu_usage_header_v1 *header = buf; struct edgetpu_usage_metric *metric = (struct edgetpu_usage_metric *)(header + 1); int i; etdev_dbg(etdev, "%s: n=%u sz=%u", __func__, header->num_metrics, header->metric_size); - if (header->metric_size != sizeof(struct edgetpu_usage_metric)) { - etdev_dbg(etdev, "%s: expected sz=%zu, discard", __func__, - sizeof(struct edgetpu_usage_metric)); + if (header->metric_size < EDGETPU_USAGE_METRIC_SIZE_V1) { + etdev_warn_once(etdev, "fw metric size %u less than minimum %u", + header->metric_size, EDGETPU_USAGE_METRIC_SIZE_V1); return; } + if (header->metric_size > sizeof(struct edgetpu_usage_metric)) + etdev_dbg(etdev, "fw metrics are later version with unknown fields"); + for (i = 0; i < header->num_metrics; i++) { switch (metric->type) { case EDGETPU_METRIC_TYPE_TPU_USAGE: @@ -284,7 +287,7 @@ void edgetpu_usage_stats_process_buffer(struct edgetpu_dev *etdev, void *buf) break; } - metric++; + metric = (struct edgetpu_usage_metric *)((char *)metric + header->metric_size); } } diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h index a60b107..9d93122 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.h +++ b/drivers/edgetpu/edgetpu-usage-stats.h @@ -10,9 +10,28 @@ #include <linux/hashtable.h> #include <linux/mutex.h> +/* The highest version of usage metrics handled by this driver. */ +#define EDGETPU_USAGE_METRIC_VERSION 1 + +/* + * Size in bytes of usage metric v1. + * If fewer bytes than this are received then discard the invalid buffer. + * This size also identifies the fw response as v1; subsequent versions will add another field + * with the version number. + */ +#define EDGETPU_USAGE_METRIC_SIZE_V1 20 + +/* v1 metric header struct. */ +struct edgetpu_usage_header_v1 { + uint32_t num_metrics; /* Number of metrics being reported */ + uint32_t metric_size; /* Size of each metric struct */ +}; + /* Header struct in the metric buffer. */ /* Must be kept in sync with firmware struct UsageTrackerHeader */ struct edgetpu_usage_header { + uint16_t header_bytes; /* Number of bytes in this header */ + uint16_t version; /* Metrics version */ uint32_t num_metrics; /* Number of metrics being reported */ uint32_t metric_size; /* Size of each metric struct */ }; @@ -20,15 +39,24 @@ struct edgetpu_usage_header { /* * Encapsulate TPU core usage information of a specific application for a * specific power state. - * Must be kept in sync with firmware struct TpuUsage. + * Must be kept in sync with firmware struct CoreUsage. */ struct tpu_usage { /* Unique identifier of the application. */ int32_t uid; /* The power state of the device (values are chip dependent) */ + /* Now called operating_point in FW. */ uint32_t power_state; /* Duration of usage in microseconds. */ uint32_t duration_us; + + /* Following fields are added in metrics v2 */ + + /* Compute Core: TPU cluster ID. */ + /* Called core_id in FW. */ + uint8_t cluster_id; + /* Reserved. Filling out the next 32-bit boundary. */ + uint8_t reserved[3]; }; /* @@ -62,7 +90,7 @@ enum edgetpu_usage_counter_type { EDGETPU_COUNTER_TPU_ACTIVE_CYCLES = 0, /* Number of stalls caused by throttling. */ EDGETPU_COUNTER_TPU_THROTTLE_STALLS = 1, - /* Number of graph invocations. */ + /* Number of graph invocations. (Now called kWorkload in FW.) */ EDGETPU_COUNTER_INFERENCES = 2, /* Number of TPU offload op invocations. */ EDGETPU_COUNTER_TPU_OPS = 3, @@ -81,7 +109,12 @@ enum edgetpu_usage_counter_type { /* Number of times (firmware)suspend function takes longer than SLA time. */ EDGETPU_COUNTER_LONG_SUSPEND = 10, - EDGETPU_COUNTER_COUNT = 11, /* number of counters above */ + /* The following counters are added in metrics v2. */ + + /* Number of context switches on a compute core. */ + EDGETPU_COUNTER_CONTEXT_SWITCHES = 11, + + EDGETPU_COUNTER_COUNT = 12, /* number of counters above */ }; /* Generic counter. Only reported if it has a value larger than 0. */ @@ -91,6 +124,11 @@ struct __packed edgetpu_usage_counter { /* Accumulated value since last initialization. */ uint64_t value; + + /* Following fields are added in metrics v2 */ + + /* Reporting component. */ + uint8_t component_id; }; /* Defines different max watermarks we track. */ @@ -121,6 +159,11 @@ struct __packed edgetpu_usage_max_watermark { * non-mobile, firmware boot on mobile). */ uint64_t value; + + /* Following fields are added in metrics v2 */ + + /* Reporting component. */ + uint8_t component_id; }; /* An enum to identify the tracked firmware threads. */ diff --git a/drivers/edgetpu/mobile-pm.c b/drivers/edgetpu/mobile-pm.c index 9799172..50c3866 100644 --- a/drivers/edgetpu/mobile-pm.c +++ b/drivers/edgetpu/mobile-pm.c @@ -64,6 +64,7 @@ static int mobile_pwr_state_init(struct device *dev) ret = pm_runtime_get_sync(dev); if (ret) { pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); dev_err(dev, "pm_runtime_get_sync returned %d\n", ret); return ret; } @@ -74,6 +75,7 @@ static int mobile_pwr_state_init(struct device *dev) dev_err(dev, "error initializing tpu state: %d\n", ret); if (curr_state > TPU_OFF) pm_runtime_put_sync(dev); + pm_runtime_disable(dev); return ret; } @@ -603,6 +605,7 @@ static int mobile_pm_after_create(struct edgetpu_pm *etpm) struct edgetpu_mobile_platform_dev *etmdev = to_mobile_dev(etdev); struct device *dev = etdev->dev; struct edgetpu_mobile_platform_pwr *platform_pwr = &etmdev->platform_pwr; + int curr_state; ret = mobile_pwr_state_init(dev); if (ret) @@ -644,7 +647,19 @@ static int mobile_pm_after_create(struct edgetpu_pm *etpm) if (platform_pwr->after_create) ret = platform_pwr->after_create(etdev); + if (ret) + goto err_debugfs_remove; + + return 0; +err_debugfs_remove: + debugfs_remove_recursive(platform_pwr->debugfs_dir); + /* pm_runtime_{enable,get_sync} were called in mobile_pwr_state_init */ + + curr_state = exynos_acpm_get_rate(TPU_ACPM_DOMAIN, 0); + if (curr_state > TPU_OFF) + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); return ret; } |