diff options
author | Nrithya Kanakasabapathy <nrithya@google.com> | 2021-06-17 19:48:23 +0000 |
---|---|---|
committer | Todd Poynor <toddpoynor@google.com> | 2021-06-17 23:06:39 +0000 |
commit | f5ae5699d42449c79ff95e52d393d2a539015abb (patch) | |
tree | 561395d169b8196d83a4cb82ccb652272d9749ea | |
parent | 631c79d974fe2d57134574606f89a1d1936ddbb7 (diff) | |
download | edgetpu-f5ae5699d42449c79ff95e52d393d2a539015abb.tar.gz |
Merge branch 'whitechapel' into android-gs-pixel-5.10
* whitechapel: (33 commits)
edgetpu: add edgetpu_kci_send_cmd_with_data
edgetpu: activate single mbox instead of bitmasks
edgetpu: add get_fatal_errors ioctl
edgetpu: set fatal error event codes
edgetpu: add fatal error event codes
edgetpu: remove redundant args
edgetpu: abrolhos scrub more TPU CPU references
edgetpu: remove some references to codenames and hardware details
edgetpu: remove some details from usage-stats comments and symbols
edgetpu: fix typo in debug dump header
edgetpu: log error when firmware load failed
edgetpu: google: use default domain when AUX disabled
edgetpu: fix edgetpu_mmu_alloc_domain memory leak
edgetpu: remove dependency on iommu_group_id
edgetpu: fix watchdog job cancel ordering
edgetpu: don't check mailbox_detachable on fs_release
...
Signed-off-by: Nrithya Kanakasabapathy <nrithya@google.com>
Change-Id: I9e876a348e290740cbc844cc7f48566f65071341
Bug: 191153847
23 files changed, 424 insertions, 254 deletions
diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c index a867c13..fe3da83 100644 --- a/drivers/edgetpu/abrolhos-device.c +++ b/drivers/edgetpu/abrolhos-device.c @@ -172,29 +172,29 @@ void edgetpu_chip_handle_reverse_kci(struct edgetpu_dev *etdev, static int abrolhos_check_ext_mailbox_args(const char *func, struct edgetpu_dev *etdev, - struct edgetpu_ext_mailbox *ext_mbox) + struct edgetpu_ext_mailbox_ioctl *args) { - if (ext_mbox->type != EDGETPU_EXT_MAILBOX_TYPE_TZ) { + if (args->type != EDGETPU_EXT_MAILBOX_TYPE_TZ) { etdev_err(etdev, "%s: Invalid type %d != %d\n", func, - ext_mbox->type, EDGETPU_EXT_MAILBOX_TYPE_TZ); + args->type, EDGETPU_EXT_MAILBOX_TYPE_TZ); return -EINVAL; } - if (ext_mbox->count != 1) { + if (args->count != 1) { etdev_err(etdev, "%s: Invalid mailbox count: %d != 1\n", func, - ext_mbox->count); + args->count); return -EINVAL; } return 0; } int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox) + struct edgetpu_ext_mailbox_ioctl *args) { struct abrolhos_platform_dev *apdev = to_abrolhos_dev(client->etdev); int ret; ret = abrolhos_check_ext_mailbox_args(__func__, client->etdev, - ext_mbox); + args); if (ret) return ret; @@ -213,13 +213,13 @@ int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client, } int edgetpu_chip_release_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox) + struct edgetpu_ext_mailbox_ioctl *args) { struct abrolhos_platform_dev *apdev = to_abrolhos_dev(client->etdev); int ret = 0; ret = abrolhos_check_ext_mailbox_args(__func__, client->etdev, - ext_mbox); + args); if (ret) return ret; diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c index 75a3c59..2930917 100644 --- a/drivers/edgetpu/abrolhos-pm.c +++ b/drivers/edgetpu/abrolhos-pm.c @@ -510,8 +510,7 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm) static void abrolhos_pm_shutdown_firmware(struct abrolhos_platform_dev *etpdev, - struct edgetpu_dev *etdev, - struct abrolhos_platform_dev *abpdev) + struct edgetpu_dev *etdev) { if (!edgetpu_pchannel_power_down(etdev, false)) return; @@ -520,9 +519,10 @@ abrolhos_pm_shutdown_firmware(struct abrolhos_platform_dev *etpdev, etdev_warn(etdev, "Requesting early GSA reset\n"); /* - * p-channel failed, request GSA shutdown to make sure the R52 core is + * p-channel failed, request GSA shutdown to make sure the CPU is * reset. - * The GSA->APM request will clear any pending DVFS status from R52. + * The GSA->APM request will clear any pending DVFS status from the + * CPU. */ gsa_send_tpu_cmd(etpdev->gsa_dev, GSA_TPU_SHUTDOWN); } @@ -582,7 +582,7 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm) if (etdev->kci && edgetpu_firmware_status_locked(etdev) == FW_VALID) { /* Update usage stats before we power off fw. */ edgetpu_kci_update_usage_locked(etdev); - abrolhos_pm_shutdown_firmware(abpdev, etdev, abpdev); + abrolhos_pm_shutdown_firmware(abpdev, etdev); edgetpu_kci_cancel_work_queues(etdev->kci); } diff --git a/drivers/edgetpu/abrolhos/config-mailbox.h b/drivers/edgetpu/abrolhos/config-mailbox.h index b19cf46..4bec2d5 100644 --- a/drivers/edgetpu/abrolhos/config-mailbox.h +++ b/drivers/edgetpu/abrolhos/config-mailbox.h @@ -14,7 +14,7 @@ #define EDGETPU_NUM_VII_MAILBOXES (EDGETPU_NUM_MAILBOXES - 1) #define EDGETPU_NUM_P2P_MAILBOXES 0 -#define ABROLHOS_TZ_MAILBOX_ID (1 << 8) +#define ABROLHOS_TZ_MAILBOX_ID 8 #define ABROLHOS_CSR_MBOX2_CONTEXT_ENABLE 0xe0000 #define ABROLHOS_CSR_MBOX2_CMD_QUEUE_DOORBELL_SET 0xe1000 diff --git a/drivers/edgetpu/abrolhos/config.h b/drivers/edgetpu/abrolhos/config.h index f6397fd..00ec926 100644 --- a/drivers/edgetpu/abrolhos/config.h +++ b/drivers/edgetpu/abrolhos/config.h @@ -13,12 +13,16 @@ #define EDGETPU_DEV_MAX 1 #define EDGETPU_HAS_MULTI_GROUPS +/* Max number of virtual context IDs that can be allocated for one device. */ +#define EDGETPU_NUM_VCIDS 16 +/* Reserved VCID that uses the extra partition. */ +#define EDGETPU_VCID_EXTRA_PARTITION 0 #define EDGETPU_HAS_WAKELOCK /* - * A remapped data region is available. This will be accessible by the R52 - * regardless of active context and is typically used for logging buffer and + * A remapped data region is available. This will be accessible by the TPU + * CPU regardless of active context and is typically used for logging buffer and * non-secure mailbox queues. */ #define EDGETPU_HAS_REMAPPED_DATA @@ -47,11 +51,11 @@ /* * Instruction remap registers make carveout memory appear at address - * 0x10000000 from the R52 perspective + * 0x10000000 from the TPU CPU perspective */ #define EDGETPU_INSTRUCTION_REMAP_BASE 0x10000000 -/* Address from which the R52 can access data in the remapped region */ +/* Address from which the TPU CPU can access data in the remapped region */ #define EDGETPU_REMAPPED_DATA_ADDR \ (EDGETPU_INSTRUCTION_REMAP_BASE + EDGETPU_REMAPPED_DATA_OFFSET) diff --git a/drivers/edgetpu/edgetpu-config.h b/drivers/edgetpu/edgetpu-config.h index 5a13adb..a76d8e3 100644 --- a/drivers/edgetpu/edgetpu-config.h +++ b/drivers/edgetpu/edgetpu-config.h @@ -8,18 +8,6 @@ #ifndef __EDGETPU_CONFIG_H__ #define __EDGETPU_CONFIG_H__ -#ifdef CONFIG_HERMOSA - -#include "hermosa/config.h" - -#else /* !CONFIG_HERMOSA */ - -#ifdef CONFIG_JANEIRO - -#include "janeiro/config.h" - -#else - #ifndef CONFIG_ABROLHOS #define CONFIG_ABROLHOS #warning "Building default chipset abrolhos" @@ -27,9 +15,6 @@ #include "abrolhos/config.h" -#endif /* CONFIG_JANEIRO */ -#endif /* CONFIG_HERMOSA */ - #define EDGETPU_DEFAULT_FIRMWARE_NAME "google/edgetpu-" DRIVER_NAME ".fw" #define EDGETPU_TEST_FIRMWARE_NAME "google/edgetpu-" DRIVER_NAME "-test.fw" diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c index 0820e95..86bf7d8 100644 --- a/drivers/edgetpu/edgetpu-core.c +++ b/drivers/edgetpu/edgetpu-core.c @@ -370,6 +370,7 @@ int edgetpu_device_add(struct edgetpu_dev *etdev, INIT_LIST_HEAD(&etdev->groups); etdev->n_groups = 0; etdev->group_join_lockout = false; + etdev->vcid_pool = (1u << EDGETPU_NUM_VCIDS) - 1; mutex_init(&etdev->state_lock); etdev->state = ETDEV_STATE_NOFW; @@ -582,11 +583,12 @@ void edgetpu_handle_firmware_crash(struct edgetpu_dev *etdev, if (crash_type == EDGETPU_FW_CRASH_UNRECOV_FAULT) { etdev_err(etdev, "firmware unrecoverable crash"); etdev->firmware_crash_count++; - edgetpu_fatal_error_notify(etdev); + edgetpu_fatal_error_notify(etdev, EDGETPU_ERROR_FW_CRASH); /* Restart firmware without chip reset */ edgetpu_watchdog_bite(etdev, false); } else { - etdev_err(etdev, "firmware crash event: %u", crash_type); + etdev_err(etdev, "firmware non-fatal crash event: %u", + crash_type); } } diff --git a/drivers/edgetpu/edgetpu-debug-dump.h b/drivers/edgetpu/edgetpu-debug-dump.h index ec33668..125ed1a 100644 --- a/drivers/edgetpu/edgetpu-debug-dump.h +++ b/drivers/edgetpu/edgetpu-debug-dump.h @@ -44,7 +44,7 @@ struct edgetpu_debug_stats { struct edgetpu_dump_segment { u64 type; /* type of the dump */ u64 size; /* size of the dump data */ - u64 src_addr; /* source of the dump on the R52 address map */ + u64 src_addr; /* source of the dump on the CPU address map */ }; struct edgetpu_debug_dump { @@ -89,4 +89,4 @@ int edgetpu_get_debug_dump(struct edgetpu_dev *etdev, */ void edgetpu_debug_dump_resp_handler(struct edgetpu_dev *etdev); -#endif /* EDEGETPU_DEBUG_DUMP_H_ */ +#endif /* EDGETPU_DEBUG_DUMP_H_ */ diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index 3b2e6fc..57c0af5 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -6,7 +6,7 @@ */ #include <linux/atomic.h> -#include <linux/bits.h> +#include <linux/bitops.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/eventfd.h> @@ -94,7 +94,7 @@ static int edgetpu_kci_join_group_worker(struct kci_worker_param *param) etdev_dbg(etdev, "%s: join group %u %u/%u", __func__, group->workload_id, i + 1, group->n_clients); - return edgetpu_kci_join_group(etdev->kci, etdev, group->n_clients, i); + return edgetpu_kci_join_group(etdev->kci, group->n_clients, i); } static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) @@ -111,7 +111,12 @@ static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) #endif /* EDGETPU_HAS_MCP */ -static int edgetpu_group_kci_open_device(struct edgetpu_device_group *group) +/* + * Activates the VII mailbox @group owns. + * + * Caller holds group->lock. + */ +static int edgetpu_group_activate(struct edgetpu_device_group *group) { u8 mailbox_id; int ret; @@ -119,14 +124,22 @@ static int edgetpu_group_kci_open_device(struct edgetpu_device_group *group) if (edgetpu_group_mailbox_detached_locked(group)) return 0; mailbox_id = edgetpu_group_context_id_locked(group); - ret = edgetpu_mailbox_activate(group->etdev, BIT(mailbox_id)); + ret = edgetpu_mailbox_activate(group->etdev, mailbox_id, group->vcid, !group->activated); if (ret) - etdev_err(group->etdev, "activate mailbox failed with %d", ret); + etdev_err(group->etdev, "activate mailbox for VCID %d failed with %d", group->vcid, + ret); + else + group->activated = true; atomic_inc(&group->etdev->job_count); return ret; } -static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group) +/* + * Deactivates the VII mailbox @group owns. + * + * Caller holds group->lock. + */ +static void edgetpu_group_deactivate(struct edgetpu_device_group *group) { u8 mailbox_id; int ret; @@ -134,10 +147,10 @@ static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group) if (edgetpu_group_mailbox_detached_locked(group)) return; mailbox_id = edgetpu_group_context_id_locked(group); - ret = edgetpu_mailbox_deactivate(group->etdev, BIT(mailbox_id)); + ret = edgetpu_mailbox_deactivate(group->etdev, mailbox_id); if (ret) - etdev_err(group->etdev, "deactivate mailbox failed with %d", - ret); + etdev_err(group->etdev, "deactivate mailbox for VCID %d failed with %d", + group->vcid, ret); return; } @@ -156,7 +169,16 @@ static void edgetpu_device_group_kci_leave(struct edgetpu_device_group *group) { #ifdef EDGETPU_HAS_MULTI_GROUPS edgetpu_kci_update_usage_async(group->etdev); - return edgetpu_group_kci_close_device(group); + /* + * Theoretically we don't need to check @dev_inaccessible here. + * @dev_inaccessible is true implies the client has wakelock count zero, under such case + * edgetpu_mailbox_deactivate() has been called on releasing the wakelock and therefore this + * edgetpu_group_deactivate() call won't send any KCI. + * Still have a check here in case this function does CSR programming other than calling + * edgetpu_mailbox_deactivate() someday. + */ + if (!group->dev_inaccessible) + edgetpu_group_deactivate(group); #else /* !EDGETPU_HAS_MULTI_GROUPS */ struct kci_worker_param *params = kmalloc_array(group->n_clients, sizeof(*params), GFP_KERNEL); @@ -198,7 +220,7 @@ static int edgetpu_device_group_kci_finalized(struct edgetpu_device_group *group) { #ifdef EDGETPU_HAS_MULTI_GROUPS - return edgetpu_group_kci_open_device(group); + return edgetpu_group_activate(group); #else /* !EDGETPU_HAS_MULTI_GROUPS */ struct kci_worker_param *params = kmalloc_array(group->n_clients, sizeof(*params), GFP_KERNEL); @@ -537,6 +559,22 @@ static int edgetpu_dev_add_group(struct edgetpu_dev *etdev, goto error_unlock; } #endif /* !EDGETPU_HAS_MULTI_GROUPS */ + if (group->etdev == etdev) { + u32 vcid_pool = etdev->vcid_pool; + +#ifdef EDGETPU_VCID_EXTRA_PARTITION + if (group->mbox_attr.partition_type != EDGETPU_PARTITION_EXTRA) + vcid_pool &= ~BIT(EDGETPU_VCID_EXTRA_PARTITION); + else + vcid_pool &= BIT(EDGETPU_VCID_EXTRA_PARTITION); +#endif + if (!vcid_pool) { + ret = -EBUSY; + goto error_unlock; + } + group->vcid = ffs(vcid_pool) - 1; + etdev->vcid_pool &= ~BIT(group->vcid); + } l->grp = edgetpu_device_group_get(group); list_add_tail(&l->list, &etdev->groups); etdev->n_groups++; @@ -611,6 +649,8 @@ void edgetpu_device_group_leave(struct edgetpu_client *client) mutex_lock(&client->etdev->groups_lock); list_for_each_entry(l, &client->etdev->groups, list) { if (l->grp == group) { + if (group->etdev == client->etdev) + client->etdev->vcid_pool |= BIT(group->vcid); list_del(&l->list); edgetpu_device_group_put(l->grp); kfree(l); @@ -1630,17 +1670,64 @@ out: return ret; } -void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev) +/* + * For each group active on @etdev: set the group status as errored, set the + * error mask, and notify the runtime of the fatal error event. + */ +void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev, uint error_mask) { - struct edgetpu_list_group *l; + size_t i, num_groups = 0; struct edgetpu_device_group *group; + struct edgetpu_device_group **groups; + struct edgetpu_list_group *g; mutex_lock(&etdev->groups_lock); + groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL); + if (unlikely(!groups)) { + /* + * Just give up setting status in this case, this only happens + * when the system is OOM. + */ + mutex_unlock(&etdev->groups_lock); + return; + } + /* + * Fetch the groups into an array to set the group status without + * holding @etdev->groups_lock. To prevent the potential deadlock that + * edgetpu_device_group_add() holds group->lock then etdev->groups_lock. + */ + etdev_for_each_group(etdev, g, group) { + if (edgetpu_device_group_is_disbanded(group)) + continue; + groups[num_groups++] = edgetpu_device_group_get(group); + } + mutex_unlock(&etdev->groups_lock); + for (i = 0; i < num_groups; i++) { + group = groups[i]; + mutex_lock(&group->lock); + /* + * Only finalized groups may have handshake with the FW, mark + * them as errored. + */ - etdev_for_each_group(etdev, l, group) + if (edgetpu_device_group_is_finalized(group)) + group->status = EDGETPU_DEVICE_GROUP_ERRORED; + group->fatal_errors |= error_mask; + mutex_unlock(&group->lock); edgetpu_group_notify(group, EDGETPU_EVENT_FATAL_ERROR); + edgetpu_device_group_put(group); + } + kfree(groups); +} - mutex_unlock(&etdev->groups_lock); +uint edgetpu_group_get_fatal_errors(struct edgetpu_device_group *group) +{ + uint fatal_errors; + + mutex_lock(&group->lock); + fatal_errors = group->fatal_errors; + mutex_unlock(&group->lock); + return fatal_errors; } void edgetpu_group_detach_mailbox_locked(struct edgetpu_device_group *group) @@ -1661,7 +1748,7 @@ void edgetpu_group_close_and_detach_mailbox(struct edgetpu_device_group *group) * Detaching mailbox for an errored group is also fine. */ if (is_finalized_or_errored(group)) { - edgetpu_group_kci_close_device(group); + edgetpu_group_deactivate(group); edgetpu_group_detach_mailbox_locked(group); } mutex_unlock(&group->lock); @@ -1688,7 +1775,7 @@ int edgetpu_group_attach_and_open_mailbox(struct edgetpu_device_group *group) if (edgetpu_device_group_is_finalized(group)) { ret = edgetpu_group_attach_mailbox_locked(group); if (!ret) - ret = edgetpu_group_kci_open_device(group); + ret = edgetpu_group_activate(group); } mutex_unlock(&group->lock); return ret; diff --git a/drivers/edgetpu/edgetpu-device-group.h b/drivers/edgetpu/edgetpu-device-group.h index 3a5e252..5e05799 100644 --- a/drivers/edgetpu/edgetpu-device-group.h +++ b/drivers/edgetpu/edgetpu-device-group.h @@ -69,6 +69,24 @@ struct edgetpu_device_group { * creating this group. */ bool mailbox_detachable; + /* + * Whether group->etdev is inaccessible. + * Some group operations will access device CSRs. If the device is known to be + * inaccessible (typically not powered on) then set this field to true to + * prevent HW interactions. + * + * This field is always false for !EDGETPU_HAS_WAKELOCK chipsets. + * + * For EDGETPU_HAS_MCP chipsets this field should be replaced with a + * boolean array with size @n_clients, but we don't have a chipset with + * EDGETPU_HAS_MCP && EDGETPU_HAS_WAKELOCK yet. + * + * Is not protected by @lock because this is only written when releasing the + * leader of this group. + */ + bool dev_inaccessible; + /* Virtual context ID to be sent to the firmware. */ + u16 vcid; /* protects everything in the following comment block */ struct mutex lock; @@ -88,6 +106,7 @@ struct edgetpu_device_group { */ struct edgetpu_client **members; enum edgetpu_device_group_status status; + bool activated; /* whether this group's VII has ever been activated */ struct edgetpu_vii vii; /* VII mailbox */ /* * Context ID ranges from EDGETPU_CONTEXT_VII_BASE to @@ -102,6 +121,9 @@ struct edgetpu_device_group { /* matrix of P2P mailboxes */ struct edgetpu_p2p_mailbox **p2p_mailbox_matrix; + /* Mask of errors set for this group. */ + uint fatal_errors; + /* end of fields protected by @lock */ /* TPU IOVA mapped to host DRAM space */ @@ -357,7 +379,10 @@ bool edgetpu_in_any_group(struct edgetpu_dev *etdev); bool edgetpu_set_group_join_lockout(struct edgetpu_dev *etdev, bool lockout); /* Notify all device groups of @etdev about a failure on the die */ -void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev); +void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev, uint error_mask); + +/* Return fatal error signaled bitmask for device group */ +uint edgetpu_group_get_fatal_errors(struct edgetpu_device_group *group); /* * Detach and release the mailbox resources of VII from @group. diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index d0dc575..2a1e577 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/types.h> +#include "edgetpu.h" #include "edgetpu-device-group.h" #include "edgetpu-firmware.h" #include "edgetpu-firmware-util.h" @@ -180,7 +181,7 @@ static int edgetpu_firmware_load_locked( if (handlers && handlers->alloc_buffer) { ret = handlers->alloc_buffer(et_fw, &fw_desc->buf); if (ret) { - etdev_dbg(etdev, "handler alloc_buffer failed: %d\n", + etdev_err(etdev, "handler alloc_buffer failed: %d\n", ret); return ret; } @@ -188,14 +189,14 @@ static int edgetpu_firmware_load_locked( ret = edgetpu_firmware_do_load_locked(et_fw, fw_desc, name); if (ret) { - etdev_dbg(etdev, "firmware request failed: %d\n", ret); + etdev_err(etdev, "firmware request failed: %d\n", ret); goto out_free_buffer; } if (handlers && handlers->setup_buffer) { ret = handlers->setup_buffer(et_fw, &fw_desc->buf); if (ret) { - etdev_dbg(etdev, "handler setup_buffer failed: %d\n", + etdev_err(etdev, "handler setup_buffer failed: %d\n", ret); goto out_do_unload_locked; } @@ -467,7 +468,7 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw, } /* - * Previous firmware buffer is not used anymore when R52 runs on + * Previous firmware buffer is not used anymore when the CPU runs on * new firmware buffer. Unload this before et_fw->p->fw_buf is * overwritten by new buffer information. */ @@ -698,54 +699,6 @@ static const struct attribute_group edgetpu_firmware_attr_group = { .attrs = dev_attrs, }; -/* - * Sets all groups related to @etdev as errored. - */ -static void edgetpu_set_groups_error(struct edgetpu_dev *etdev) -{ - size_t i, num_groups = 0; - struct edgetpu_device_group *group; - struct edgetpu_device_group **groups; - struct edgetpu_list_group *g; - - mutex_lock(&etdev->groups_lock); - groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL); - if (unlikely(!groups)) { - /* - * Just give up setting status in this case, this only happens - * when the system is OOM. - */ - mutex_unlock(&etdev->groups_lock); - edgetpu_fatal_error_notify(etdev); - return; - } - /* - * Fetch the groups into an array to set the group status without - * holding @etdev->groups_lock. To prevent the potential deadlock that - * edgetpu_device_group_add() holds group->lock then etdev->groups_lock. - */ - etdev_for_each_group(etdev, g, group) { - if (edgetpu_device_group_is_disbanded(group)) - continue; - groups[num_groups++] = edgetpu_device_group_get(group); - } - mutex_unlock(&etdev->groups_lock); - for (i = 0; i < num_groups; i++) { - group = groups[i]; - mutex_lock(&group->lock); - /* - * Only finalized groups may have handshake with the FW, mark - * them as errored. - */ - if (edgetpu_device_group_is_finalized(group)) - group->status = EDGETPU_DEVICE_GROUP_ERRORED; - mutex_unlock(&group->lock); - edgetpu_device_group_put(group); - } - edgetpu_fatal_error_notify(etdev); - kfree(groups); -} - static void edgetpu_firmware_wdt_timeout_action(void *data) { int ret; @@ -762,7 +715,7 @@ static void edgetpu_firmware_wdt_timeout_action(void *data) * groups the CLOSE_DEVICE KCIs won't be sent. */ edgetpu_handshake_clear_fw_state(&etdev->mailbox_manager->open_devices); - edgetpu_set_groups_error(etdev); + edgetpu_fatal_error_notify(etdev, EDGETPU_ERROR_WATCHDOG_TIMEOUT); /* Another procedure is loading the firmware, let it do the work. */ if (edgetpu_firmware_is_loading(etdev)) diff --git a/drivers/edgetpu/edgetpu-firmware.h b/drivers/edgetpu/edgetpu-firmware.h index e41543d..3b784c5 100644 --- a/drivers/edgetpu/edgetpu-firmware.h +++ b/drivers/edgetpu/edgetpu-firmware.h @@ -39,7 +39,7 @@ enum edgetpu_fw_flavor { FW_FLAVOR_BL1 = 1, /* systest app image */ FW_FLAVOR_SYSTEST = 2, - /* default production app image from DarwiNN team */ + /* default production app image */ FW_FLAVOR_PROD_DEFAULT = 3, /* custom image produced by other teams */ FW_FLAVOR_CUSTOM = 4, @@ -140,7 +140,7 @@ struct edgetpu_firmware_handlers { struct edgetpu_firmware_buffer *fw_buf); /* * Platform-specific handling after firmware loaded, before running - * the firmware, such as validating the firmware or resetting the R52 + * the firmware, such as validating the firmware or resetting the * processor. */ int (*prepare_run)(struct edgetpu_firmware *et_fw, diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 1fd9106..311ea86 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -107,16 +107,21 @@ static int edgetpu_fs_release(struct inode *inode, struct file *file) wakelock_count = edgetpu_wakelock_lock(client->wakelock); mutex_lock(&client->group_lock); /* - * @wakelock = 0 means the device might be powered off. And for group with a non-detachable - * mailbox, its mailbox is removed when the group is released, in such case we need to - * ensure the device is powered to prevent kernel panic on programming VII mailbox CSRs. + * @wakelock_count = 0 means the device might be powered off. And for group with a + * non-detachable mailbox, its mailbox is removed when the group is released, in such case + * we need to ensure the device is powered to prevent kernel panic on programming VII + * mailbox CSRs. * * For mailbox-detachable groups the mailbox had been removed when the wakelock was * released, edgetpu_device_group_release() doesn't need the device be powered in this case. */ if (!wakelock_count && client->group && !client->group->mailbox_detachable) { - wakelock_count = 1; - edgetpu_pm_get(etdev->pm); + /* assumes @group->etdev == @client->etdev, i.e. @client is the leader of @group */ + if (!edgetpu_pm_get(etdev->pm)) + wakelock_count = 1; + else + /* failed to power on - prevent group releasing from accessing the device */ + client->group->dev_inaccessible = true; } mutex_unlock(&client->group_lock); edgetpu_wakelock_unlock(client->wakelock); @@ -626,9 +631,9 @@ edgetpu_ioctl_dram_usage(struct edgetpu_dev *etdev, static int edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox __user *argp) + struct edgetpu_ext_mailbox_ioctl __user *argp) { - struct edgetpu_ext_mailbox ext_mailbox; + struct edgetpu_ext_mailbox_ioctl ext_mailbox; if (copy_from_user(&ext_mailbox, argp, sizeof(ext_mailbox))) return -EFAULT; @@ -638,9 +643,9 @@ edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client, static int edgetpu_ioctl_release_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox __user *argp) + struct edgetpu_ext_mailbox_ioctl __user *argp) { - struct edgetpu_ext_mailbox ext_mailbox; + struct edgetpu_ext_mailbox_ioctl ext_mailbox; if (copy_from_user(&ext_mailbox, argp, sizeof(ext_mailbox))) return -EFAULT; @@ -648,6 +653,21 @@ edgetpu_ioctl_release_ext_mailbox(struct edgetpu_client *client, return edgetpu_chip_release_ext_mailbox(client, &ext_mailbox); } +static int edgetpu_ioctl_get_fatal_errors(struct edgetpu_client *client, + __u32 __user *argp) +{ + u32 fatal_errors = 0; + int ret = 0; + + mutex_lock(&client->group_lock); + if (client->group) + fatal_errors = edgetpu_group_get_fatal_errors(client->group); + mutex_unlock(&client->group_lock); + if (copy_to_user(argp, &fatal_errors, sizeof(fatal_errors))) + ret = -EFAULT; + return ret; +} + long edgetpu_ioctl(struct file *file, uint cmd, ulong arg) { struct edgetpu_client *client = file->private_data; @@ -736,6 +756,9 @@ long edgetpu_ioctl(struct file *file, uint cmd, ulong arg) case EDGETPU_RELEASE_EXT_MAILBOX: ret = edgetpu_ioctl_release_ext_mailbox(client, argp); break; + case EDGETPU_GET_FATAL_ERRORS: + ret = edgetpu_ioctl_get_fatal_errors(client, argp); + break; default: return -ENOTTY; /* unknown command */ diff --git a/drivers/edgetpu/edgetpu-google-iommu.c b/drivers/edgetpu/edgetpu-google-iommu.c index 3496df0..9d28949 100644 --- a/drivers/edgetpu/edgetpu-google-iommu.c +++ b/drivers/edgetpu/edgetpu-google-iommu.c @@ -71,6 +71,9 @@ get_domain_by_context_id(struct edgetpu_dev *etdev, struct edgetpu_iommu *etiommu = etdev->mmu_cookie; uint pasid; + /* always return the default domain when AUX is not supported */ + if (!etiommu->aux_enabled) + return iommu_get_domain_for_dev(dev); if (ctx_id == EDGETPU_CONTEXT_INVALID) return NULL; if (ctx_id & EDGETPU_CONTEXT_DOMAIN_TOKEN) @@ -627,14 +630,11 @@ static struct edgetpu_iommu_domain invalid_etdomain = { struct edgetpu_iommu_domain *edgetpu_mmu_alloc_domain(struct edgetpu_dev *etdev) { - struct edgetpu_iommu_domain *etdomain = - kzalloc(sizeof(*etdomain), GFP_KERNEL); + struct edgetpu_iommu_domain *etdomain; struct edgetpu_iommu *etiommu = etdev->mmu_cookie; struct iommu_domain *domain; int token; - if (!etdomain) - return NULL; if (!etiommu->aux_enabled) return &invalid_etdomain; domain = iommu_domain_alloc(etdev->dev->bus); @@ -643,15 +643,23 @@ struct edgetpu_iommu_domain *edgetpu_mmu_alloc_domain(struct edgetpu_dev *etdev) return NULL; } + etdomain = kzalloc(sizeof(*etdomain), GFP_KERNEL); + if (!etdomain) { + iommu_domain_free(domain); + return NULL; + } + mutex_lock(&etiommu->pool_lock); token = idr_alloc(&etiommu->domain_pool, domain, 0, EDGETPU_DOMAIN_TOKEN_END, GFP_KERNEL); mutex_unlock(&etiommu->pool_lock); if (token < 0) { etdev_warn(etdev, "alloc iommu domain token failed: %d", token); + kfree(etdomain); iommu_domain_free(domain); return NULL; } + edgetpu_init_etdomain(etdomain, domain, token); return etdomain; } diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h index 1258cf0..f5c6853 100644 --- a/drivers/edgetpu/edgetpu-internal.h +++ b/drivers/edgetpu/edgetpu-internal.h @@ -167,10 +167,16 @@ struct edgetpu_dev { struct dentry *d_entry; /* debugfs dir for this device */ struct mutex state_lock; /* protects state of this device */ enum edgetpu_dev_state state; - struct mutex groups_lock; /* protects groups, n_groups, and lockout */ + struct mutex groups_lock; + /* fields protected by @groups_lock */ + struct list_head groups; uint n_groups; /* number of entries in @groups */ bool group_join_lockout; /* disable group join while reinit */ + u32 vcid_pool; /* bitmask of VCID to be allocated */ + + /* end of fields protected by @groups_lock */ + void *mmu_cookie; /* mmu driver private data */ void *dram_cookie; /* on-device DRAM private data */ struct edgetpu_mailbox_manager *mailbox_manager; @@ -416,7 +422,7 @@ int edgetpu_get_state_errno_locked(struct edgetpu_dev *etdev); /* * "External mailboxes" below refers to mailboxes that are not handled - * directly by the DarwiNN runtime, such as secure or device-to-device. + * directly by the runtime, such as secure or device-to-device. * * Chip specific code will typically keep track of state and inform the firmware * that a mailbox has become active/inactive. @@ -424,10 +430,10 @@ int edgetpu_get_state_errno_locked(struct edgetpu_dev *etdev); /* Chip-specific code to acquire external mailboxes */ int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox); + struct edgetpu_ext_mailbox_ioctl *args); /* Chip-specific code to release external mailboxes */ int edgetpu_chip_release_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox); + struct edgetpu_ext_mailbox_ioctl *args); #endif /* __EDGETPU_INTERNAL_H__ */ diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c index c62ac73..0adf9a5 100644 --- a/drivers/edgetpu/edgetpu-kci.c +++ b/drivers/edgetpu/edgetpu-kci.c @@ -6,6 +6,7 @@ * Copyright (C) 2019 Google, Inc. */ +#include <linux/bits.h> #include <linux/circ_buf.h> #include <linux/device.h> #include <linux/dma-mapping.h> /* dmam_alloc_coherent */ @@ -690,6 +691,41 @@ static int edgetpu_kci_send_cmd_return_resp( return resp->code; } +static int edgetpu_kci_send_cmd_with_data(struct edgetpu_kci *kci, + struct edgetpu_command_element *cmd, const void *data, + size_t size) +{ + struct edgetpu_dev *etdev = kci->mailbox->etdev; + dma_addr_t dma_addr; + tpu_addr_t tpu_addr; + int ret; + void *ptr = dma_alloc_coherent(etdev->dev, size, &dma_addr, GFP_KERNEL); + const u32 flags = EDGETPU_MMU_DIE | EDGETPU_MMU_32 | EDGETPU_MMU_HOST; + + if (!ptr) + return -ENOMEM; + memcpy(ptr, data, size); + + tpu_addr = edgetpu_mmu_tpu_map(etdev, dma_addr, size, DMA_TO_DEVICE, EDGETPU_CONTEXT_KCI, + flags); + if (!tpu_addr) { + etdev_err(etdev, "%s: failed to map to TPU", __func__); + dma_free_coherent(etdev->dev, size, ptr, dma_addr); + return -ENOSPC; + } + etdev_dbg(etdev, "%s: map kva=%pK iova=0x%llx dma=%pad", __func__, ptr, tpu_addr, + &dma_addr); + + cmd->dma.address = tpu_addr; + cmd->dma.size = size; + ret = edgetpu_kci_send_cmd(kci, cmd); + edgetpu_mmu_tpu_unmap(etdev, tpu_addr, size, EDGETPU_CONTEXT_KCI); + dma_free_coherent(etdev->dev, size, ptr, dma_addr); + etdev_dbg(etdev, "%s: unmap kva=%pK iova=0x%llx dma=%pad", __func__, ptr, tpu_addr, + &dma_addr); + return ret; +} + int edgetpu_kci_send_cmd(struct edgetpu_kci *kci, struct edgetpu_command_element *cmd) { @@ -741,51 +777,19 @@ int edgetpu_kci_map_trace_buffer(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, return edgetpu_kci_send_cmd(kci, &cmd); } -int edgetpu_kci_join_group(struct edgetpu_kci *kci, struct edgetpu_dev *etdev, - u8 n_dies, u8 vid) +int edgetpu_kci_join_group(struct edgetpu_kci *kci, u8 n_dies, u8 vid) { - struct edgetpu_kci_device_group_detail *detail; - const u32 size = sizeof(*detail); - dma_addr_t dma_addr; - tpu_addr_t tpu_addr; struct edgetpu_command_element cmd = { .code = KCI_CODE_JOIN_GROUP, - .dma = { - .size = size, - }, }; - const u32 flags = EDGETPU_MMU_DIE | EDGETPU_MMU_32 | EDGETPU_MMU_HOST; - int ret; + const struct edgetpu_kci_device_group_detail detail = { + .n_dies = n_dies, + .vid = vid, + }; if (!kci) return -ENODEV; - detail = dma_alloc_coherent(etdev->dev, sizeof(*detail), &dma_addr, - GFP_KERNEL); - if (!detail) - return -ENOMEM; - detail->n_dies = n_dies; - detail->vid = vid; - - tpu_addr = edgetpu_mmu_tpu_map(etdev, dma_addr, size, DMA_TO_DEVICE, - EDGETPU_CONTEXT_KCI, flags); - if (!tpu_addr) { - etdev_err(etdev, "%s: failed to map group detail to TPU", - __func__); - dma_free_coherent(etdev->dev, size, detail, dma_addr); - return -EINVAL; - } - - cmd.dma.address = tpu_addr; - etdev_dbg(etdev, "%s: map kva=%pK iova=0x%llx dma=%pad", __func__, - detail, tpu_addr, &dma_addr); - - ret = edgetpu_kci_send_cmd(kci, &cmd); - edgetpu_mmu_tpu_unmap(etdev, tpu_addr, size, EDGETPU_CONTEXT_KCI); - dma_free_coherent(etdev->dev, size, detail, dma_addr); - etdev_dbg(etdev, "%s: unmap kva=%pK iova=0x%llx dma=%pad", __func__, - detail, tpu_addr, &dma_addr); - - return ret; + return edgetpu_kci_send_cmd_with_data(kci, &cmd, &detail, sizeof(detail)); } int edgetpu_kci_leave_group(struct edgetpu_kci *kci) @@ -989,26 +993,33 @@ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, return edgetpu_kci_send_cmd(kci, &cmd); } -int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids) +int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open) { + const struct edgetpu_kci_open_device_detail detail = { + .mailbox_id = mailbox_id, + .vcid = vcid, + .flags = first_open, + }; struct edgetpu_command_element cmd = { .code = KCI_CODE_OPEN_DEVICE, .dma = { - .flags = mailbox_ids, + .flags = BIT(mailbox_id), }, }; if (!kci) return -ENODEV; - return edgetpu_kci_send_cmd(kci, &cmd); + if (vcid < 0) + return edgetpu_kci_send_cmd(kci, &cmd); + return edgetpu_kci_send_cmd_with_data(kci, &cmd, &detail, sizeof(detail)); } -int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids) +int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id) { struct edgetpu_command_element cmd = { .code = KCI_CODE_CLOSE_DEVICE, .dma = { - .flags = mailbox_ids, + .flags = BIT(mailbox_id), }, }; diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h index 05f87c8..97e4079 100644 --- a/drivers/edgetpu/edgetpu-kci.h +++ b/drivers/edgetpu/edgetpu-kci.h @@ -204,6 +204,29 @@ struct edgetpu_kci_device_group_detail { u8 reserved[6]; /* padding */ }; +struct edgetpu_kci_open_device_detail { + /* The ID of mailbox to be opened. */ + u16 mailbox_id; + /* + * Virtual context ID @mailbox_id is associated to. + * For device groups with @mailbox_detachable attribute the mailbox attached to the group + * can be different after wakelock re-acquired. Firmware uses this VCID to identify the + * device group. + */ + u16 vcid; + /* + * Extra flags for the attributes of this request. + * Set RESERVED bits to 0 to ensure backwards compatibility. + * + * Bitfields: + * [0:0] - first_open: Specifies if this is the first time we are calling mailbox open + * KCI for this VCID after it has been allocated to a device group. This allows + * firmware to clean up/reset the memory allocator for that partition. + * [31:1] - RESERVED + */ + u32 flags; +}; + /* * Initializes a KCI object. * @@ -328,8 +351,7 @@ int edgetpu_kci_map_trace_buffer(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, * * Returns the code of response, or a negative errno on error. */ -int edgetpu_kci_join_group(struct edgetpu_kci *kci, struct edgetpu_dev *etdev, - u8 n_dies, u8 vid); +int edgetpu_kci_join_group(struct edgetpu_kci *kci, u8 n_dies, u8 vid); /* Informs the TPU to leave the group it currently belongs to. */ int edgetpu_kci_leave_group(struct edgetpu_kci *kci); @@ -344,20 +366,20 @@ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, size_t size); /* - * Inform the firmware to prepare to serve the VII of @mailbox_ids. + * Inform the firmware to prepare to serve the VII of @mailbox_id. * * You usually shouldn't call this directly - consider using * edgetpu_mailbox_activate() instead. */ -int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids); +int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open); /* - * Inform the firmware the VII with @mailbox_ids are closed. + * Inform the firmware the VII with @mailbox_id is closed. * * You usually shouldn't call this directly - consider using * edgetpu_mailbox_deactivate() instead. */ -int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids); +int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id); /* Cancel work queues or wait until they're done */ void edgetpu_kci_cancel_work_queues(struct edgetpu_kci *kci); diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c index 606b246..ba33eab 100644 --- a/drivers/edgetpu/edgetpu-mailbox.c +++ b/drivers/edgetpu/edgetpu-mailbox.c @@ -473,7 +473,8 @@ void edgetpu_mailbox_remove_vii(struct edgetpu_vii *vii) edgetpu_mailbox_free_queue(etdev, vii->mailbox, &vii->cmd_queue_mem); edgetpu_mailbox_free_queue(etdev, vii->mailbox, &vii->resp_queue_mem); if (vii->mailbox) { - edgetpu_mailbox_disable(vii->mailbox); + if (!vii->mailbox->internal.group->dev_inaccessible) + edgetpu_mailbox_disable(vii->mailbox); edgetpu_device_group_put(vii->mailbox->internal.group); edgetpu_mailbox_remove(etdev->mailbox_manager, vii->mailbox); vii->mailbox = NULL; @@ -768,14 +769,14 @@ void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev) kfree(groups); } -int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids) +int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_id) { int ret; if (!edgetpu_wakelock_lock(client->wakelock)) { etdev_err(client->etdev, - "Enabling mailboxes %08x needs wakelock acquired\n", - mailbox_ids); + "Enabling mailbox %d needs wakelock acquired\n", + mailbox_id); edgetpu_wakelock_unlock(client->wakelock); return -EAGAIN; } @@ -783,24 +784,24 @@ int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids) edgetpu_wakelock_inc_event_locked(client->wakelock, EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX); - etdev_dbg(client->etdev, "Enabling mailboxes: %08X\n", mailbox_ids); + etdev_dbg(client->etdev, "Enabling mailbox: %d\n", mailbox_id); - ret = edgetpu_mailbox_activate(client->etdev, mailbox_ids); + ret = edgetpu_mailbox_activate(client->etdev, mailbox_id, -1, false); if (ret) - etdev_err(client->etdev, "Activate mailboxes %08x failed: %d", - mailbox_ids, ret); + etdev_err(client->etdev, "Activate mailbox %d failed: %d", + mailbox_id, ret); edgetpu_wakelock_unlock(client->wakelock); return ret; } -int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids) +int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_id) { int ret; if (!edgetpu_wakelock_lock(client->wakelock)) { etdev_err(client->etdev, - "Disabling mailboxes %08x needs wakelock acquired\n", - mailbox_ids); + "Disabling mailbox %d needs wakelock acquired\n", + mailbox_id); edgetpu_wakelock_unlock(client->wakelock); return -EAGAIN; } @@ -808,47 +809,45 @@ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids) edgetpu_wakelock_dec_event_locked(client->wakelock, EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX); - etdev_dbg(client->etdev, "Disabling mailbox: %08X\n", mailbox_ids); - ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_ids); + etdev_dbg(client->etdev, "Disabling mailbox: %d\n", mailbox_id); + ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_id); if (ret) - etdev_err(client->etdev, "Deactivate mailboxes %08x failed: %d", - mailbox_ids, ret); + etdev_err(client->etdev, "Deactivate mailbox %d failed: %d", + mailbox_id, ret); edgetpu_wakelock_unlock(client->wakelock); return ret; } -int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids) +int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open) { struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices; - u32 to_send; + const u32 bit = BIT(mailbox_id); int ret = 0; mutex_lock(&eh->lock); - to_send = mailbox_ids & ~eh->fw_state; - if (to_send) - ret = edgetpu_kci_open_device(etdev->kci, to_send); + if (bit & ~eh->fw_state) + ret = edgetpu_kci_open_device(etdev->kci, mailbox_id, vcid, first_open); if (!ret) { - eh->state |= mailbox_ids; - eh->fw_state |= mailbox_ids; + eh->state |= bit; + eh->fw_state |= bit; } mutex_unlock(&eh->lock); return ret; } -int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids) +int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id) { struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices; - u32 to_send; + const u32 bit = BIT(mailbox_id); int ret = 0; mutex_lock(&eh->lock); - to_send = mailbox_ids & eh->fw_state; - if (to_send) - ret = edgetpu_kci_close_device(etdev->kci, to_send); + if (bit & eh->fw_state) + ret = edgetpu_kci_close_device(etdev->kci, mailbox_id); if (!ret) { - eh->state &= ~mailbox_ids; - eh->fw_state &= ~mailbox_ids; + eh->state &= ~bit; + eh->fw_state &= ~bit; } mutex_unlock(&eh->lock); return ret; diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h index 8425807..d0fb9ae 100644 --- a/drivers/edgetpu/edgetpu-mailbox.h +++ b/drivers/edgetpu/edgetpu-mailbox.h @@ -302,26 +302,26 @@ void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev); int edgetpu_mailbox_p2p_batch(struct edgetpu_mailbox_manager *mgr, uint n, uint skip_i, struct edgetpu_mailbox **mailboxes); -/* Notify firmware of external mailboxes becoming active */ -int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids); +/* Notify firmware of an external mailbox becoming active */ +int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_id); -/* Notify firmware of external mailboxes becoming inactive */ -int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids); +/* Notify firmware of an external mailbox becoming inactive */ +int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_id); /* - * Activates @mailbox_ids, OPEN_DEVICE KCI will be sent. + * Activates @mailbox_id, OPEN_DEVICE KCI will be sent. * - * If @mailbox_ids are known to be activated, KCI is not sent and this function + * If @mailbox_id is known to be activated, KCI is not sent and this function * returns 0. * * Returns what edgetpu_kci_open_device() returned. * Caller ensures device is powered on. */ -int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids); +int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open); /* * Similar to edgetpu_mailbox_activate() but sends CLOSE_DEVICE KCI instead. */ -int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids); +int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id); /* Sets @eh->fw_state to 0. */ void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh); diff --git a/drivers/edgetpu/edgetpu-mmu.h b/drivers/edgetpu/edgetpu-mmu.h index 8c5ae3c..094f14d 100644 --- a/drivers/edgetpu/edgetpu-mmu.h +++ b/drivers/edgetpu/edgetpu-mmu.h @@ -199,9 +199,7 @@ void edgetpu_mmu_free(struct edgetpu_dev *etdev, tpu_addr_t tpu_addr, * * Description: Add a mapping from iova -> paddr to the MMU for the chip. * paddr can be considered a physical address from the TPU's viewpoint, but - * may actually be another IOVA for another IOMMU downstream of the chip MMU - * (as on Hermosa, where the SMMU translates TPU VAs to IOVAs sent to the IOMMU - * downstream of the TPU). + * may actually be another IOVA for another IOMMU downstream of the chip MMU. * * Note: for chipsets with edgetpu_mmu_alloc() support, @iova passed to this * function must be either allocated from edgetpu_mmu_alloc() or reserved by @@ -230,12 +228,12 @@ void edgetpu_mmu_remove_translation(struct edgetpu_dev *etdev, * @context_id: context ID for the mapping * @mmu_flags: the flag or'ed with EDGETPU_MMU_* macros * - * Description: For chips with internal MMUs (e.g., Hermosa SMMU), add the - * required internal MMU mapping for the TPU to access @downstream_addr, the - * DMA or physical address of the buffer as returned by the Linux DMA API when - * the DMA mapping was created. This can be used with, for example, buffers - * allocated using dma_alloc_coherent(), which are mapped appropriately for - * any downstream IOMMU and must be mapped to the TPU internal MMU as well. + * Description: For chips with internal MMUs, add the required internal MMU + * mapping for the TPU to access @down_addr, the DMA or physical address of the + * buffer as returned by the Linux DMA API when the DMA mapping was created. + * This can be used with, for example, buffers allocated using + * dma_alloc_coherent(), which are mapped appropriately for any downstream IOMMU + * and must be mapped to the TPU internal MMU as well. * * For a chip that doesn't have an internal MMU but has the IOMMU domain AUX * feature, perform the necessary mapping to @context_id and return the @@ -261,9 +259,8 @@ void edgetpu_mmu_tpu_unmap(struct edgetpu_dev *etdev, * @context_id: context ID for the mapping * @mmu_flags: the flag or'ed with EDGETPU_MMU_* macros * - * Description: For chips with internal MMUs (e.g., Hermosa SMMU), add the - * required internal MMU mapping for the TPU to access the DMA addresses of - * @sgt. + * Description: For chips with internal MMUs, add the required internal MMU + * mapping for the TPU to access the DMA addresses of @sgt. * * For a chip that doesn't have an internal MMU but has the IOMMU domain AUX * feature, perform the necessary mapping to @context_id and return the diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c index 1e28141..df1c179 100644 --- a/drivers/edgetpu/edgetpu-pm.c +++ b/drivers/edgetpu/edgetpu-pm.c @@ -242,7 +242,7 @@ static int pchannel_state_change_request(struct edgetpu_dev *etdev, int state) if (state == STATE_RUN) return 0; - /* Phase 3: R52 acknowledgment */ + /* Phase 3: CPU acknowledgment */ ret = etdev_poll_power_state(etdev, val, (val & PACCEPT) || (val & PDENY)); if (val & PDENY) { diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.c b/drivers/edgetpu/edgetpu-sw-watchdog.c index 5d96e4d..4e7f681 100644 --- a/drivers/edgetpu/edgetpu-sw-watchdog.c +++ b/drivers/edgetpu/edgetpu-sw-watchdog.c @@ -5,6 +5,7 @@ * Copyright (C) 2020 Google, Inc. */ +#include <asm/barrier.h> #include <linux/atomic.h> #include <linux/module.h> #include <linux/slab.h> @@ -124,21 +125,29 @@ int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long active_ms, int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev) { - struct edgetpu_sw_wdt *etdev_sw_wdt = etdev->etdev_sw_wdt; + struct edgetpu_sw_wdt *wdt; - if (!etdev_sw_wdt) + /* to match edgetpu_sw_wdt_destroy() */ + smp_mb(); + wdt = etdev->etdev_sw_wdt; + if (!wdt) return -EINVAL; - if (!etdev_sw_wdt->et_action_work.edgetpu_sw_wdt_handler) + if (!wdt->et_action_work.edgetpu_sw_wdt_handler) etdev_err(etdev, "sw wdt handler not set\n"); - sw_wdt_start(etdev_sw_wdt); + sw_wdt_start(wdt); return 0; } void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev) { - if (!etdev->etdev_sw_wdt) + struct edgetpu_sw_wdt *wdt; + + /* to match edgetpu_sw_wdt_destroy() */ + smp_mb(); + wdt = etdev->etdev_sw_wdt; + if (!wdt) return; - sw_wdt_stop(etdev->etdev_sw_wdt); + sw_wdt_stop(wdt); } void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev) @@ -149,9 +158,14 @@ void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev) if (!wdt) return; etdev->etdev_sw_wdt = NULL; + /* + * To ensure that etdev->etdev_sw_wdt is NULL so wdt_start() calls from other processes + * won't start the watchdog again. + */ + smp_mb(); + sw_wdt_stop(wdt); /* cancel and sync work due to watchdog bite to prevent UAF */ cancel_work_sync(&wdt->et_action_work.work); - sw_wdt_stop(wdt); counter = atomic_read(&wdt->active_counter); if (counter) etdev_warn(etdev, "Unbalanced WDT active counter: %d", counter); diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h index 7ea3e9d..20d5ad7 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.h +++ b/drivers/edgetpu/edgetpu-usage-stats.h @@ -36,9 +36,9 @@ struct tpu_usage { * Must be kept in sync with firmware struct Component. */ enum edgetpu_usage_component { - /* The device as a whole (TPU, R52, DMA330, etc.) */ + /* The device as a whole */ EDGETPU_USAGE_COMPONENT_DEVICE = 0, - /* Just the TPU core (scalar core and tiles) */ + /* Just the TPU core */ EDGETPU_USAGE_COMPONENT_TPU = 1, EDGETPU_USAGE_COMPONENT_COUNT = 2, /* number of components above */ }; @@ -112,20 +112,9 @@ struct __packed edgetpu_usage_max_watermark { /* An enum to identify the tracked firmware threads. */ /* Must be kept in sync with firmware enum class UsageTrackerThreadId. */ enum edgetpu_usage_threadid { - EDGETPU_FW_THREAD_MAIN = 0, - EDGETPU_FW_THREAD_KCI_HANDLER = 1, - EDGETPU_FW_THREAD_POWER_ADMIN = 2, - EDGETPU_FW_THREAD_VII_SCHEDULER = 3, - EDGETPU_FW_THREAD_VII_HANDLER = 4, - EDGETPU_FW_THREAD_MCP_GRAPH_DRIVER = 5, - EDGETPU_FW_THREAD_SCP_GRAPH_DRIVER = 6, - EDGETPU_FW_THREAD_TPU_DRIVER = 7, - EDGETPU_FW_THREAD_RESTART_HANDLER = 8, - EDGETPU_FW_THREAD_POLL_SERVICE = 9, - EDGETPU_FW_THREAD_DMA_DRIVER = 10, - EDGETPU_FW_THREAD_GRAPH_DMA_DRIVER = 11, - - /* Number of task identifiers above. */ + /* Individual thread IDs are not tracked. */ + + /* Number of task identifiers. */ EDGETPU_FW_THREAD_COUNT = 12, }; diff --git a/drivers/edgetpu/edgetpu.h b/drivers/edgetpu/edgetpu.h index 93d7afa..b5a52c2 100644 --- a/drivers/edgetpu/edgetpu.h +++ b/drivers/edgetpu/edgetpu.h @@ -154,6 +154,9 @@ struct edgetpu_event_register { * released. */ #define EDGETPU_PRIORITY_DETACHABLE (1u << 3) +/* For @partition_type. */ +#define EDGETPU_PARTITION_NORMAL 0 +#define EDGETPU_PARTITION_EXTRA 1 struct edgetpu_mailbox_attr { /* * There are limitations on these size fields, see the error cases in @@ -166,6 +169,8 @@ struct edgetpu_mailbox_attr { __u32 sizeof_resp; /* size of response element in bytes */ __u32 priority : 4; /* mailbox service priority */ __u32 cmdq_tail_doorbell: 1; /* auto doorbell on cmd queue tail move */ + /* Type of memory partitions to be used for this group, exact meaning is chip-dependent. */ + __u32 partition_type : 1; }; /* @@ -517,14 +522,14 @@ struct edgetpu_device_dram_usage { _IOR(EDGETPU_IOCTL_BASE, 29, struct edgetpu_device_dram_usage) /* - * struct edgetpu_ext_mailbox + * struct edgetpu_ext_mailbox_ioctl * @client_id: Client identifier (may not be needed depending on type) * @attrs: Array of mailbox attributes (pointer to * edgetpu_mailbox_attr, may be NULL depending on type) * @type: One of the EDGETPU_EXT_MAILBOX_xxx values * @count: Number of mailboxes to acquire */ -struct edgetpu_ext_mailbox { +struct edgetpu_ext_mailbox_ioctl { __u64 client_id; __u64 attrs; __u32 type; @@ -536,13 +541,53 @@ struct edgetpu_ext_mailbox { * runtime. This can be a secure mailbox or a device-to-device mailbox. */ #define EDGETPU_ACQUIRE_EXT_MAILBOX \ - _IOW(EDGETPU_IOCTL_BASE, 30, struct edgetpu_ext_mailbox) + _IOW(EDGETPU_IOCTL_BASE, 30, struct edgetpu_ext_mailbox_ioctl) /* * Release a chip-specific mailbox that is not directly managed by the TPU * runtime. This can be a secure mailbox or a device-to-device mailbox. */ #define EDGETPU_RELEASE_EXT_MAILBOX \ - _IOW(EDGETPU_IOCTL_BASE, 31, struct edgetpu_ext_mailbox) + _IOW(EDGETPU_IOCTL_BASE, 31, struct edgetpu_ext_mailbox_ioctl) + +/* Fatal error event bitmasks... */ +/* Firmware crash in non-restartable thread */ +#define EDGETPU_ERROR_FW_CRASH 0x1 +/* Host or device watchdog timeout */ +#define EDGETPU_ERROR_WATCHDOG_TIMEOUT 0x2 +/* Thermal shutdown */ +#define EDGETPU_ERROR_THERMAL_STOP 0x4 +/* TPU hardware inaccessible: link fail, memory protection unit blocking... */ +#define EDGETPU_ERROR_HW_NO_ACCESS 0x8 +/* Various hardware failures */ +#define EDGETPU_ERROR_HW_FAIL 0x10 + +/* + * Return fatal errors raised for the client's device group, as a bitmask of + * the above fatal error event codes, or zero if no errors encountered or + * client is not part of a device group. + */ +#define EDGETPU_GET_FATAL_ERRORS \ + _IOR(EDGETPU_IOCTL_BASE, 32, __u32) + +/* Fatal error event bitmasks... */ +/* Firmware crash in non-restartable thread */ +#define EDGETPU_ERROR_FW_CRASH 0x1 +/* Host or device watchdog timeout */ +#define EDGETPU_ERROR_WATCHDOG_TIMEOUT 0x2 +/* Thermal shutdown */ +#define EDGETPU_ERROR_THERMAL_STOP 0x4 +/* TPU hardware inaccessible: link fail, memory protection unit blocking... */ +#define EDGETPU_ERROR_HW_NO_ACCESS 0x8 +/* Various hardware failures */ +#define EDGETPU_ERROR_HW_FAIL 0x10 + +/* + * Return fatal errors raised for the client's device group, as a bitmask of + * the above fatal error event codes, or zero if no errors encountered or + * client is not part of a device group. + */ +#define EDGETPU_GET_FATAL_ERRORS \ + _IOR(EDGETPU_IOCTL_BASE, 32, __u32) #endif /* __EDGETPU_H__ */ |