summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNrithya Kanakasabapathy <nrithya@google.com>2021-06-17 19:48:23 +0000
committerTodd Poynor <toddpoynor@google.com>2021-06-17 23:06:39 +0000
commitf5ae5699d42449c79ff95e52d393d2a539015abb (patch)
tree561395d169b8196d83a4cb82ccb652272d9749ea
parent631c79d974fe2d57134574606f89a1d1936ddbb7 (diff)
downloadedgetpu-f5ae5699d42449c79ff95e52d393d2a539015abb.tar.gz
Merge branch 'whitechapel' into android-gs-pixel-5.10
* whitechapel: (33 commits) edgetpu: add edgetpu_kci_send_cmd_with_data edgetpu: activate single mbox instead of bitmasks edgetpu: add get_fatal_errors ioctl edgetpu: set fatal error event codes edgetpu: add fatal error event codes edgetpu: remove redundant args edgetpu: abrolhos scrub more TPU CPU references edgetpu: remove some references to codenames and hardware details edgetpu: remove some details from usage-stats comments and symbols edgetpu: fix typo in debug dump header edgetpu: log error when firmware load failed edgetpu: google: use default domain when AUX disabled edgetpu: fix edgetpu_mmu_alloc_domain memory leak edgetpu: remove dependency on iommu_group_id edgetpu: fix watchdog job cancel ordering edgetpu: don't check mailbox_detachable on fs_release ... Signed-off-by: Nrithya Kanakasabapathy <nrithya@google.com> Change-Id: I9e876a348e290740cbc844cc7f48566f65071341 Bug: 191153847
-rw-r--r--drivers/edgetpu/abrolhos-device.c18
-rw-r--r--drivers/edgetpu/abrolhos-pm.c10
-rw-r--r--drivers/edgetpu/abrolhos/config-mailbox.h2
-rw-r--r--drivers/edgetpu/abrolhos/config.h12
-rw-r--r--drivers/edgetpu/edgetpu-config.h15
-rw-r--r--drivers/edgetpu/edgetpu-core.c6
-rw-r--r--drivers/edgetpu/edgetpu-debug-dump.h4
-rw-r--r--drivers/edgetpu/edgetpu-device-group.c121
-rw-r--r--drivers/edgetpu/edgetpu-device-group.h27
-rw-r--r--drivers/edgetpu/edgetpu-firmware.c59
-rw-r--r--drivers/edgetpu/edgetpu-firmware.h4
-rw-r--r--drivers/edgetpu/edgetpu-fs.c41
-rw-r--r--drivers/edgetpu/edgetpu-google-iommu.c16
-rw-r--r--drivers/edgetpu/edgetpu-internal.h14
-rw-r--r--drivers/edgetpu/edgetpu-kci.c97
-rw-r--r--drivers/edgetpu/edgetpu-kci.h34
-rw-r--r--drivers/edgetpu/edgetpu-mailbox.c57
-rw-r--r--drivers/edgetpu/edgetpu-mailbox.h16
-rw-r--r--drivers/edgetpu/edgetpu-mmu.h21
-rw-r--r--drivers/edgetpu/edgetpu-pm.c2
-rw-r--r--drivers/edgetpu/edgetpu-sw-watchdog.c28
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.h21
-rw-r--r--drivers/edgetpu/edgetpu.h53
23 files changed, 424 insertions, 254 deletions
diff --git a/drivers/edgetpu/abrolhos-device.c b/drivers/edgetpu/abrolhos-device.c
index a867c13..fe3da83 100644
--- a/drivers/edgetpu/abrolhos-device.c
+++ b/drivers/edgetpu/abrolhos-device.c
@@ -172,29 +172,29 @@ void edgetpu_chip_handle_reverse_kci(struct edgetpu_dev *etdev,
static int abrolhos_check_ext_mailbox_args(const char *func,
struct edgetpu_dev *etdev,
- struct edgetpu_ext_mailbox *ext_mbox)
+ struct edgetpu_ext_mailbox_ioctl *args)
{
- if (ext_mbox->type != EDGETPU_EXT_MAILBOX_TYPE_TZ) {
+ if (args->type != EDGETPU_EXT_MAILBOX_TYPE_TZ) {
etdev_err(etdev, "%s: Invalid type %d != %d\n", func,
- ext_mbox->type, EDGETPU_EXT_MAILBOX_TYPE_TZ);
+ args->type, EDGETPU_EXT_MAILBOX_TYPE_TZ);
return -EINVAL;
}
- if (ext_mbox->count != 1) {
+ if (args->count != 1) {
etdev_err(etdev, "%s: Invalid mailbox count: %d != 1\n", func,
- ext_mbox->count);
+ args->count);
return -EINVAL;
}
return 0;
}
int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client,
- struct edgetpu_ext_mailbox *ext_mbox)
+ struct edgetpu_ext_mailbox_ioctl *args)
{
struct abrolhos_platform_dev *apdev = to_abrolhos_dev(client->etdev);
int ret;
ret = abrolhos_check_ext_mailbox_args(__func__, client->etdev,
- ext_mbox);
+ args);
if (ret)
return ret;
@@ -213,13 +213,13 @@ int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client,
}
int edgetpu_chip_release_ext_mailbox(struct edgetpu_client *client,
- struct edgetpu_ext_mailbox *ext_mbox)
+ struct edgetpu_ext_mailbox_ioctl *args)
{
struct abrolhos_platform_dev *apdev = to_abrolhos_dev(client->etdev);
int ret = 0;
ret = abrolhos_check_ext_mailbox_args(__func__, client->etdev,
- ext_mbox);
+ args);
if (ret)
return ret;
diff --git a/drivers/edgetpu/abrolhos-pm.c b/drivers/edgetpu/abrolhos-pm.c
index 75a3c59..2930917 100644
--- a/drivers/edgetpu/abrolhos-pm.c
+++ b/drivers/edgetpu/abrolhos-pm.c
@@ -510,8 +510,7 @@ static int abrolhos_power_up(struct edgetpu_pm *etpm)
static void
abrolhos_pm_shutdown_firmware(struct abrolhos_platform_dev *etpdev,
- struct edgetpu_dev *etdev,
- struct abrolhos_platform_dev *abpdev)
+ struct edgetpu_dev *etdev)
{
if (!edgetpu_pchannel_power_down(etdev, false))
return;
@@ -520,9 +519,10 @@ abrolhos_pm_shutdown_firmware(struct abrolhos_platform_dev *etpdev,
etdev_warn(etdev, "Requesting early GSA reset\n");
/*
- * p-channel failed, request GSA shutdown to make sure the R52 core is
+ * p-channel failed, request GSA shutdown to make sure the CPU is
* reset.
- * The GSA->APM request will clear any pending DVFS status from R52.
+ * The GSA->APM request will clear any pending DVFS status from the
+ * CPU.
*/
gsa_send_tpu_cmd(etpdev->gsa_dev, GSA_TPU_SHUTDOWN);
}
@@ -582,7 +582,7 @@ static void abrolhos_power_down(struct edgetpu_pm *etpm)
if (etdev->kci && edgetpu_firmware_status_locked(etdev) == FW_VALID) {
/* Update usage stats before we power off fw. */
edgetpu_kci_update_usage_locked(etdev);
- abrolhos_pm_shutdown_firmware(abpdev, etdev, abpdev);
+ abrolhos_pm_shutdown_firmware(abpdev, etdev);
edgetpu_kci_cancel_work_queues(etdev->kci);
}
diff --git a/drivers/edgetpu/abrolhos/config-mailbox.h b/drivers/edgetpu/abrolhos/config-mailbox.h
index b19cf46..4bec2d5 100644
--- a/drivers/edgetpu/abrolhos/config-mailbox.h
+++ b/drivers/edgetpu/abrolhos/config-mailbox.h
@@ -14,7 +14,7 @@
#define EDGETPU_NUM_VII_MAILBOXES (EDGETPU_NUM_MAILBOXES - 1)
#define EDGETPU_NUM_P2P_MAILBOXES 0
-#define ABROLHOS_TZ_MAILBOX_ID (1 << 8)
+#define ABROLHOS_TZ_MAILBOX_ID 8
#define ABROLHOS_CSR_MBOX2_CONTEXT_ENABLE 0xe0000
#define ABROLHOS_CSR_MBOX2_CMD_QUEUE_DOORBELL_SET 0xe1000
diff --git a/drivers/edgetpu/abrolhos/config.h b/drivers/edgetpu/abrolhos/config.h
index f6397fd..00ec926 100644
--- a/drivers/edgetpu/abrolhos/config.h
+++ b/drivers/edgetpu/abrolhos/config.h
@@ -13,12 +13,16 @@
#define EDGETPU_DEV_MAX 1
#define EDGETPU_HAS_MULTI_GROUPS
+/* Max number of virtual context IDs that can be allocated for one device. */
+#define EDGETPU_NUM_VCIDS 16
+/* Reserved VCID that uses the extra partition. */
+#define EDGETPU_VCID_EXTRA_PARTITION 0
#define EDGETPU_HAS_WAKELOCK
/*
- * A remapped data region is available. This will be accessible by the R52
- * regardless of active context and is typically used for logging buffer and
+ * A remapped data region is available. This will be accessible by the TPU
+ * CPU regardless of active context and is typically used for logging buffer and
* non-secure mailbox queues.
*/
#define EDGETPU_HAS_REMAPPED_DATA
@@ -47,11 +51,11 @@
/*
* Instruction remap registers make carveout memory appear at address
- * 0x10000000 from the R52 perspective
+ * 0x10000000 from the TPU CPU perspective
*/
#define EDGETPU_INSTRUCTION_REMAP_BASE 0x10000000
-/* Address from which the R52 can access data in the remapped region */
+/* Address from which the TPU CPU can access data in the remapped region */
#define EDGETPU_REMAPPED_DATA_ADDR \
(EDGETPU_INSTRUCTION_REMAP_BASE + EDGETPU_REMAPPED_DATA_OFFSET)
diff --git a/drivers/edgetpu/edgetpu-config.h b/drivers/edgetpu/edgetpu-config.h
index 5a13adb..a76d8e3 100644
--- a/drivers/edgetpu/edgetpu-config.h
+++ b/drivers/edgetpu/edgetpu-config.h
@@ -8,18 +8,6 @@
#ifndef __EDGETPU_CONFIG_H__
#define __EDGETPU_CONFIG_H__
-#ifdef CONFIG_HERMOSA
-
-#include "hermosa/config.h"
-
-#else /* !CONFIG_HERMOSA */
-
-#ifdef CONFIG_JANEIRO
-
-#include "janeiro/config.h"
-
-#else
-
#ifndef CONFIG_ABROLHOS
#define CONFIG_ABROLHOS
#warning "Building default chipset abrolhos"
@@ -27,9 +15,6 @@
#include "abrolhos/config.h"
-#endif /* CONFIG_JANEIRO */
-#endif /* CONFIG_HERMOSA */
-
#define EDGETPU_DEFAULT_FIRMWARE_NAME "google/edgetpu-" DRIVER_NAME ".fw"
#define EDGETPU_TEST_FIRMWARE_NAME "google/edgetpu-" DRIVER_NAME "-test.fw"
diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c
index 0820e95..86bf7d8 100644
--- a/drivers/edgetpu/edgetpu-core.c
+++ b/drivers/edgetpu/edgetpu-core.c
@@ -370,6 +370,7 @@ int edgetpu_device_add(struct edgetpu_dev *etdev,
INIT_LIST_HEAD(&etdev->groups);
etdev->n_groups = 0;
etdev->group_join_lockout = false;
+ etdev->vcid_pool = (1u << EDGETPU_NUM_VCIDS) - 1;
mutex_init(&etdev->state_lock);
etdev->state = ETDEV_STATE_NOFW;
@@ -582,11 +583,12 @@ void edgetpu_handle_firmware_crash(struct edgetpu_dev *etdev,
if (crash_type == EDGETPU_FW_CRASH_UNRECOV_FAULT) {
etdev_err(etdev, "firmware unrecoverable crash");
etdev->firmware_crash_count++;
- edgetpu_fatal_error_notify(etdev);
+ edgetpu_fatal_error_notify(etdev, EDGETPU_ERROR_FW_CRASH);
/* Restart firmware without chip reset */
edgetpu_watchdog_bite(etdev, false);
} else {
- etdev_err(etdev, "firmware crash event: %u", crash_type);
+ etdev_err(etdev, "firmware non-fatal crash event: %u",
+ crash_type);
}
}
diff --git a/drivers/edgetpu/edgetpu-debug-dump.h b/drivers/edgetpu/edgetpu-debug-dump.h
index ec33668..125ed1a 100644
--- a/drivers/edgetpu/edgetpu-debug-dump.h
+++ b/drivers/edgetpu/edgetpu-debug-dump.h
@@ -44,7 +44,7 @@ struct edgetpu_debug_stats {
struct edgetpu_dump_segment {
u64 type; /* type of the dump */
u64 size; /* size of the dump data */
- u64 src_addr; /* source of the dump on the R52 address map */
+ u64 src_addr; /* source of the dump on the CPU address map */
};
struct edgetpu_debug_dump {
@@ -89,4 +89,4 @@ int edgetpu_get_debug_dump(struct edgetpu_dev *etdev,
*/
void edgetpu_debug_dump_resp_handler(struct edgetpu_dev *etdev);
-#endif /* EDEGETPU_DEBUG_DUMP_H_ */
+#endif /* EDGETPU_DEBUG_DUMP_H_ */
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index 3b2e6fc..57c0af5 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -6,7 +6,7 @@
*/
#include <linux/atomic.h>
-#include <linux/bits.h>
+#include <linux/bitops.h>
#include <linux/dma-direction.h>
#include <linux/dma-mapping.h>
#include <linux/eventfd.h>
@@ -94,7 +94,7 @@ static int edgetpu_kci_join_group_worker(struct kci_worker_param *param)
etdev_dbg(etdev, "%s: join group %u %u/%u", __func__,
group->workload_id, i + 1, group->n_clients);
- return edgetpu_kci_join_group(etdev->kci, etdev, group->n_clients, i);
+ return edgetpu_kci_join_group(etdev->kci, group->n_clients, i);
}
static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param)
@@ -111,7 +111,12 @@ static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param)
#endif /* EDGETPU_HAS_MCP */
-static int edgetpu_group_kci_open_device(struct edgetpu_device_group *group)
+/*
+ * Activates the VII mailbox @group owns.
+ *
+ * Caller holds group->lock.
+ */
+static int edgetpu_group_activate(struct edgetpu_device_group *group)
{
u8 mailbox_id;
int ret;
@@ -119,14 +124,22 @@ static int edgetpu_group_kci_open_device(struct edgetpu_device_group *group)
if (edgetpu_group_mailbox_detached_locked(group))
return 0;
mailbox_id = edgetpu_group_context_id_locked(group);
- ret = edgetpu_mailbox_activate(group->etdev, BIT(mailbox_id));
+ ret = edgetpu_mailbox_activate(group->etdev, mailbox_id, group->vcid, !group->activated);
if (ret)
- etdev_err(group->etdev, "activate mailbox failed with %d", ret);
+ etdev_err(group->etdev, "activate mailbox for VCID %d failed with %d", group->vcid,
+ ret);
+ else
+ group->activated = true;
atomic_inc(&group->etdev->job_count);
return ret;
}
-static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group)
+/*
+ * Deactivates the VII mailbox @group owns.
+ *
+ * Caller holds group->lock.
+ */
+static void edgetpu_group_deactivate(struct edgetpu_device_group *group)
{
u8 mailbox_id;
int ret;
@@ -134,10 +147,10 @@ static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group)
if (edgetpu_group_mailbox_detached_locked(group))
return;
mailbox_id = edgetpu_group_context_id_locked(group);
- ret = edgetpu_mailbox_deactivate(group->etdev, BIT(mailbox_id));
+ ret = edgetpu_mailbox_deactivate(group->etdev, mailbox_id);
if (ret)
- etdev_err(group->etdev, "deactivate mailbox failed with %d",
- ret);
+ etdev_err(group->etdev, "deactivate mailbox for VCID %d failed with %d",
+ group->vcid, ret);
return;
}
@@ -156,7 +169,16 @@ static void edgetpu_device_group_kci_leave(struct edgetpu_device_group *group)
{
#ifdef EDGETPU_HAS_MULTI_GROUPS
edgetpu_kci_update_usage_async(group->etdev);
- return edgetpu_group_kci_close_device(group);
+ /*
+ * Theoretically we don't need to check @dev_inaccessible here.
+ * @dev_inaccessible is true implies the client has wakelock count zero, under such case
+ * edgetpu_mailbox_deactivate() has been called on releasing the wakelock and therefore this
+ * edgetpu_group_deactivate() call won't send any KCI.
+ * Still have a check here in case this function does CSR programming other than calling
+ * edgetpu_mailbox_deactivate() someday.
+ */
+ if (!group->dev_inaccessible)
+ edgetpu_group_deactivate(group);
#else /* !EDGETPU_HAS_MULTI_GROUPS */
struct kci_worker_param *params =
kmalloc_array(group->n_clients, sizeof(*params), GFP_KERNEL);
@@ -198,7 +220,7 @@ static int
edgetpu_device_group_kci_finalized(struct edgetpu_device_group *group)
{
#ifdef EDGETPU_HAS_MULTI_GROUPS
- return edgetpu_group_kci_open_device(group);
+ return edgetpu_group_activate(group);
#else /* !EDGETPU_HAS_MULTI_GROUPS */
struct kci_worker_param *params =
kmalloc_array(group->n_clients, sizeof(*params), GFP_KERNEL);
@@ -537,6 +559,22 @@ static int edgetpu_dev_add_group(struct edgetpu_dev *etdev,
goto error_unlock;
}
#endif /* !EDGETPU_HAS_MULTI_GROUPS */
+ if (group->etdev == etdev) {
+ u32 vcid_pool = etdev->vcid_pool;
+
+#ifdef EDGETPU_VCID_EXTRA_PARTITION
+ if (group->mbox_attr.partition_type != EDGETPU_PARTITION_EXTRA)
+ vcid_pool &= ~BIT(EDGETPU_VCID_EXTRA_PARTITION);
+ else
+ vcid_pool &= BIT(EDGETPU_VCID_EXTRA_PARTITION);
+#endif
+ if (!vcid_pool) {
+ ret = -EBUSY;
+ goto error_unlock;
+ }
+ group->vcid = ffs(vcid_pool) - 1;
+ etdev->vcid_pool &= ~BIT(group->vcid);
+ }
l->grp = edgetpu_device_group_get(group);
list_add_tail(&l->list, &etdev->groups);
etdev->n_groups++;
@@ -611,6 +649,8 @@ void edgetpu_device_group_leave(struct edgetpu_client *client)
mutex_lock(&client->etdev->groups_lock);
list_for_each_entry(l, &client->etdev->groups, list) {
if (l->grp == group) {
+ if (group->etdev == client->etdev)
+ client->etdev->vcid_pool |= BIT(group->vcid);
list_del(&l->list);
edgetpu_device_group_put(l->grp);
kfree(l);
@@ -1630,17 +1670,64 @@ out:
return ret;
}
-void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev)
+/*
+ * For each group active on @etdev: set the group status as errored, set the
+ * error mask, and notify the runtime of the fatal error event.
+ */
+void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev, uint error_mask)
{
- struct edgetpu_list_group *l;
+ size_t i, num_groups = 0;
struct edgetpu_device_group *group;
+ struct edgetpu_device_group **groups;
+ struct edgetpu_list_group *g;
mutex_lock(&etdev->groups_lock);
+ groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL);
+ if (unlikely(!groups)) {
+ /*
+ * Just give up setting status in this case, this only happens
+ * when the system is OOM.
+ */
+ mutex_unlock(&etdev->groups_lock);
+ return;
+ }
+ /*
+ * Fetch the groups into an array to set the group status without
+ * holding @etdev->groups_lock. To prevent the potential deadlock that
+ * edgetpu_device_group_add() holds group->lock then etdev->groups_lock.
+ */
+ etdev_for_each_group(etdev, g, group) {
+ if (edgetpu_device_group_is_disbanded(group))
+ continue;
+ groups[num_groups++] = edgetpu_device_group_get(group);
+ }
+ mutex_unlock(&etdev->groups_lock);
+ for (i = 0; i < num_groups; i++) {
+ group = groups[i];
+ mutex_lock(&group->lock);
+ /*
+ * Only finalized groups may have handshake with the FW, mark
+ * them as errored.
+ */
- etdev_for_each_group(etdev, l, group)
+ if (edgetpu_device_group_is_finalized(group))
+ group->status = EDGETPU_DEVICE_GROUP_ERRORED;
+ group->fatal_errors |= error_mask;
+ mutex_unlock(&group->lock);
edgetpu_group_notify(group, EDGETPU_EVENT_FATAL_ERROR);
+ edgetpu_device_group_put(group);
+ }
+ kfree(groups);
+}
- mutex_unlock(&etdev->groups_lock);
+uint edgetpu_group_get_fatal_errors(struct edgetpu_device_group *group)
+{
+ uint fatal_errors;
+
+ mutex_lock(&group->lock);
+ fatal_errors = group->fatal_errors;
+ mutex_unlock(&group->lock);
+ return fatal_errors;
}
void edgetpu_group_detach_mailbox_locked(struct edgetpu_device_group *group)
@@ -1661,7 +1748,7 @@ void edgetpu_group_close_and_detach_mailbox(struct edgetpu_device_group *group)
* Detaching mailbox for an errored group is also fine.
*/
if (is_finalized_or_errored(group)) {
- edgetpu_group_kci_close_device(group);
+ edgetpu_group_deactivate(group);
edgetpu_group_detach_mailbox_locked(group);
}
mutex_unlock(&group->lock);
@@ -1688,7 +1775,7 @@ int edgetpu_group_attach_and_open_mailbox(struct edgetpu_device_group *group)
if (edgetpu_device_group_is_finalized(group)) {
ret = edgetpu_group_attach_mailbox_locked(group);
if (!ret)
- ret = edgetpu_group_kci_open_device(group);
+ ret = edgetpu_group_activate(group);
}
mutex_unlock(&group->lock);
return ret;
diff --git a/drivers/edgetpu/edgetpu-device-group.h b/drivers/edgetpu/edgetpu-device-group.h
index 3a5e252..5e05799 100644
--- a/drivers/edgetpu/edgetpu-device-group.h
+++ b/drivers/edgetpu/edgetpu-device-group.h
@@ -69,6 +69,24 @@ struct edgetpu_device_group {
* creating this group.
*/
bool mailbox_detachable;
+ /*
+ * Whether group->etdev is inaccessible.
+ * Some group operations will access device CSRs. If the device is known to be
+ * inaccessible (typically not powered on) then set this field to true to
+ * prevent HW interactions.
+ *
+ * This field is always false for !EDGETPU_HAS_WAKELOCK chipsets.
+ *
+ * For EDGETPU_HAS_MCP chipsets this field should be replaced with a
+ * boolean array with size @n_clients, but we don't have a chipset with
+ * EDGETPU_HAS_MCP && EDGETPU_HAS_WAKELOCK yet.
+ *
+ * Is not protected by @lock because this is only written when releasing the
+ * leader of this group.
+ */
+ bool dev_inaccessible;
+ /* Virtual context ID to be sent to the firmware. */
+ u16 vcid;
/* protects everything in the following comment block */
struct mutex lock;
@@ -88,6 +106,7 @@ struct edgetpu_device_group {
*/
struct edgetpu_client **members;
enum edgetpu_device_group_status status;
+ bool activated; /* whether this group's VII has ever been activated */
struct edgetpu_vii vii; /* VII mailbox */
/*
* Context ID ranges from EDGETPU_CONTEXT_VII_BASE to
@@ -102,6 +121,9 @@ struct edgetpu_device_group {
/* matrix of P2P mailboxes */
struct edgetpu_p2p_mailbox **p2p_mailbox_matrix;
+ /* Mask of errors set for this group. */
+ uint fatal_errors;
+
/* end of fields protected by @lock */
/* TPU IOVA mapped to host DRAM space */
@@ -357,7 +379,10 @@ bool edgetpu_in_any_group(struct edgetpu_dev *etdev);
bool edgetpu_set_group_join_lockout(struct edgetpu_dev *etdev, bool lockout);
/* Notify all device groups of @etdev about a failure on the die */
-void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev);
+void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev, uint error_mask);
+
+/* Return fatal error signaled bitmask for device group */
+uint edgetpu_group_get_fatal_errors(struct edgetpu_device_group *group);
/*
* Detach and release the mailbox resources of VII from @group.
diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c
index d0dc575..2a1e577 100644
--- a/drivers/edgetpu/edgetpu-firmware.c
+++ b/drivers/edgetpu/edgetpu-firmware.c
@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/types.h>
+#include "edgetpu.h"
#include "edgetpu-device-group.h"
#include "edgetpu-firmware.h"
#include "edgetpu-firmware-util.h"
@@ -180,7 +181,7 @@ static int edgetpu_firmware_load_locked(
if (handlers && handlers->alloc_buffer) {
ret = handlers->alloc_buffer(et_fw, &fw_desc->buf);
if (ret) {
- etdev_dbg(etdev, "handler alloc_buffer failed: %d\n",
+ etdev_err(etdev, "handler alloc_buffer failed: %d\n",
ret);
return ret;
}
@@ -188,14 +189,14 @@ static int edgetpu_firmware_load_locked(
ret = edgetpu_firmware_do_load_locked(et_fw, fw_desc, name);
if (ret) {
- etdev_dbg(etdev, "firmware request failed: %d\n", ret);
+ etdev_err(etdev, "firmware request failed: %d\n", ret);
goto out_free_buffer;
}
if (handlers && handlers->setup_buffer) {
ret = handlers->setup_buffer(et_fw, &fw_desc->buf);
if (ret) {
- etdev_dbg(etdev, "handler setup_buffer failed: %d\n",
+ etdev_err(etdev, "handler setup_buffer failed: %d\n",
ret);
goto out_do_unload_locked;
}
@@ -467,7 +468,7 @@ int edgetpu_firmware_run_locked(struct edgetpu_firmware *et_fw,
}
/*
- * Previous firmware buffer is not used anymore when R52 runs on
+ * Previous firmware buffer is not used anymore when the CPU runs on
* new firmware buffer. Unload this before et_fw->p->fw_buf is
* overwritten by new buffer information.
*/
@@ -698,54 +699,6 @@ static const struct attribute_group edgetpu_firmware_attr_group = {
.attrs = dev_attrs,
};
-/*
- * Sets all groups related to @etdev as errored.
- */
-static void edgetpu_set_groups_error(struct edgetpu_dev *etdev)
-{
- size_t i, num_groups = 0;
- struct edgetpu_device_group *group;
- struct edgetpu_device_group **groups;
- struct edgetpu_list_group *g;
-
- mutex_lock(&etdev->groups_lock);
- groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL);
- if (unlikely(!groups)) {
- /*
- * Just give up setting status in this case, this only happens
- * when the system is OOM.
- */
- mutex_unlock(&etdev->groups_lock);
- edgetpu_fatal_error_notify(etdev);
- return;
- }
- /*
- * Fetch the groups into an array to set the group status without
- * holding @etdev->groups_lock. To prevent the potential deadlock that
- * edgetpu_device_group_add() holds group->lock then etdev->groups_lock.
- */
- etdev_for_each_group(etdev, g, group) {
- if (edgetpu_device_group_is_disbanded(group))
- continue;
- groups[num_groups++] = edgetpu_device_group_get(group);
- }
- mutex_unlock(&etdev->groups_lock);
- for (i = 0; i < num_groups; i++) {
- group = groups[i];
- mutex_lock(&group->lock);
- /*
- * Only finalized groups may have handshake with the FW, mark
- * them as errored.
- */
- if (edgetpu_device_group_is_finalized(group))
- group->status = EDGETPU_DEVICE_GROUP_ERRORED;
- mutex_unlock(&group->lock);
- edgetpu_device_group_put(group);
- }
- edgetpu_fatal_error_notify(etdev);
- kfree(groups);
-}
-
static void edgetpu_firmware_wdt_timeout_action(void *data)
{
int ret;
@@ -762,7 +715,7 @@ static void edgetpu_firmware_wdt_timeout_action(void *data)
* groups the CLOSE_DEVICE KCIs won't be sent.
*/
edgetpu_handshake_clear_fw_state(&etdev->mailbox_manager->open_devices);
- edgetpu_set_groups_error(etdev);
+ edgetpu_fatal_error_notify(etdev, EDGETPU_ERROR_WATCHDOG_TIMEOUT);
/* Another procedure is loading the firmware, let it do the work. */
if (edgetpu_firmware_is_loading(etdev))
diff --git a/drivers/edgetpu/edgetpu-firmware.h b/drivers/edgetpu/edgetpu-firmware.h
index e41543d..3b784c5 100644
--- a/drivers/edgetpu/edgetpu-firmware.h
+++ b/drivers/edgetpu/edgetpu-firmware.h
@@ -39,7 +39,7 @@ enum edgetpu_fw_flavor {
FW_FLAVOR_BL1 = 1,
/* systest app image */
FW_FLAVOR_SYSTEST = 2,
- /* default production app image from DarwiNN team */
+ /* default production app image */
FW_FLAVOR_PROD_DEFAULT = 3,
/* custom image produced by other teams */
FW_FLAVOR_CUSTOM = 4,
@@ -140,7 +140,7 @@ struct edgetpu_firmware_handlers {
struct edgetpu_firmware_buffer *fw_buf);
/*
* Platform-specific handling after firmware loaded, before running
- * the firmware, such as validating the firmware or resetting the R52
+ * the firmware, such as validating the firmware or resetting the
* processor.
*/
int (*prepare_run)(struct edgetpu_firmware *et_fw,
diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c
index 1fd9106..311ea86 100644
--- a/drivers/edgetpu/edgetpu-fs.c
+++ b/drivers/edgetpu/edgetpu-fs.c
@@ -107,16 +107,21 @@ static int edgetpu_fs_release(struct inode *inode, struct file *file)
wakelock_count = edgetpu_wakelock_lock(client->wakelock);
mutex_lock(&client->group_lock);
/*
- * @wakelock = 0 means the device might be powered off. And for group with a non-detachable
- * mailbox, its mailbox is removed when the group is released, in such case we need to
- * ensure the device is powered to prevent kernel panic on programming VII mailbox CSRs.
+ * @wakelock_count = 0 means the device might be powered off. And for group with a
+ * non-detachable mailbox, its mailbox is removed when the group is released, in such case
+ * we need to ensure the device is powered to prevent kernel panic on programming VII
+ * mailbox CSRs.
*
* For mailbox-detachable groups the mailbox had been removed when the wakelock was
* released, edgetpu_device_group_release() doesn't need the device be powered in this case.
*/
if (!wakelock_count && client->group && !client->group->mailbox_detachable) {
- wakelock_count = 1;
- edgetpu_pm_get(etdev->pm);
+ /* assumes @group->etdev == @client->etdev, i.e. @client is the leader of @group */
+ if (!edgetpu_pm_get(etdev->pm))
+ wakelock_count = 1;
+ else
+ /* failed to power on - prevent group releasing from accessing the device */
+ client->group->dev_inaccessible = true;
}
mutex_unlock(&client->group_lock);
edgetpu_wakelock_unlock(client->wakelock);
@@ -626,9 +631,9 @@ edgetpu_ioctl_dram_usage(struct edgetpu_dev *etdev,
static int
edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client,
- struct edgetpu_ext_mailbox __user *argp)
+ struct edgetpu_ext_mailbox_ioctl __user *argp)
{
- struct edgetpu_ext_mailbox ext_mailbox;
+ struct edgetpu_ext_mailbox_ioctl ext_mailbox;
if (copy_from_user(&ext_mailbox, argp, sizeof(ext_mailbox)))
return -EFAULT;
@@ -638,9 +643,9 @@ edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client,
static int
edgetpu_ioctl_release_ext_mailbox(struct edgetpu_client *client,
- struct edgetpu_ext_mailbox __user *argp)
+ struct edgetpu_ext_mailbox_ioctl __user *argp)
{
- struct edgetpu_ext_mailbox ext_mailbox;
+ struct edgetpu_ext_mailbox_ioctl ext_mailbox;
if (copy_from_user(&ext_mailbox, argp, sizeof(ext_mailbox)))
return -EFAULT;
@@ -648,6 +653,21 @@ edgetpu_ioctl_release_ext_mailbox(struct edgetpu_client *client,
return edgetpu_chip_release_ext_mailbox(client, &ext_mailbox);
}
+static int edgetpu_ioctl_get_fatal_errors(struct edgetpu_client *client,
+ __u32 __user *argp)
+{
+ u32 fatal_errors = 0;
+ int ret = 0;
+
+ mutex_lock(&client->group_lock);
+ if (client->group)
+ fatal_errors = edgetpu_group_get_fatal_errors(client->group);
+ mutex_unlock(&client->group_lock);
+ if (copy_to_user(argp, &fatal_errors, sizeof(fatal_errors)))
+ ret = -EFAULT;
+ return ret;
+}
+
long edgetpu_ioctl(struct file *file, uint cmd, ulong arg)
{
struct edgetpu_client *client = file->private_data;
@@ -736,6 +756,9 @@ long edgetpu_ioctl(struct file *file, uint cmd, ulong arg)
case EDGETPU_RELEASE_EXT_MAILBOX:
ret = edgetpu_ioctl_release_ext_mailbox(client, argp);
break;
+ case EDGETPU_GET_FATAL_ERRORS:
+ ret = edgetpu_ioctl_get_fatal_errors(client, argp);
+ break;
default:
return -ENOTTY; /* unknown command */
diff --git a/drivers/edgetpu/edgetpu-google-iommu.c b/drivers/edgetpu/edgetpu-google-iommu.c
index 3496df0..9d28949 100644
--- a/drivers/edgetpu/edgetpu-google-iommu.c
+++ b/drivers/edgetpu/edgetpu-google-iommu.c
@@ -71,6 +71,9 @@ get_domain_by_context_id(struct edgetpu_dev *etdev,
struct edgetpu_iommu *etiommu = etdev->mmu_cookie;
uint pasid;
+ /* always return the default domain when AUX is not supported */
+ if (!etiommu->aux_enabled)
+ return iommu_get_domain_for_dev(dev);
if (ctx_id == EDGETPU_CONTEXT_INVALID)
return NULL;
if (ctx_id & EDGETPU_CONTEXT_DOMAIN_TOKEN)
@@ -627,14 +630,11 @@ static struct edgetpu_iommu_domain invalid_etdomain = {
struct edgetpu_iommu_domain *edgetpu_mmu_alloc_domain(struct edgetpu_dev *etdev)
{
- struct edgetpu_iommu_domain *etdomain =
- kzalloc(sizeof(*etdomain), GFP_KERNEL);
+ struct edgetpu_iommu_domain *etdomain;
struct edgetpu_iommu *etiommu = etdev->mmu_cookie;
struct iommu_domain *domain;
int token;
- if (!etdomain)
- return NULL;
if (!etiommu->aux_enabled)
return &invalid_etdomain;
domain = iommu_domain_alloc(etdev->dev->bus);
@@ -643,15 +643,23 @@ struct edgetpu_iommu_domain *edgetpu_mmu_alloc_domain(struct edgetpu_dev *etdev)
return NULL;
}
+ etdomain = kzalloc(sizeof(*etdomain), GFP_KERNEL);
+ if (!etdomain) {
+ iommu_domain_free(domain);
+ return NULL;
+ }
+
mutex_lock(&etiommu->pool_lock);
token = idr_alloc(&etiommu->domain_pool, domain, 0,
EDGETPU_DOMAIN_TOKEN_END, GFP_KERNEL);
mutex_unlock(&etiommu->pool_lock);
if (token < 0) {
etdev_warn(etdev, "alloc iommu domain token failed: %d", token);
+ kfree(etdomain);
iommu_domain_free(domain);
return NULL;
}
+
edgetpu_init_etdomain(etdomain, domain, token);
return etdomain;
}
diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h
index 1258cf0..f5c6853 100644
--- a/drivers/edgetpu/edgetpu-internal.h
+++ b/drivers/edgetpu/edgetpu-internal.h
@@ -167,10 +167,16 @@ struct edgetpu_dev {
struct dentry *d_entry; /* debugfs dir for this device */
struct mutex state_lock; /* protects state of this device */
enum edgetpu_dev_state state;
- struct mutex groups_lock; /* protects groups, n_groups, and lockout */
+ struct mutex groups_lock;
+ /* fields protected by @groups_lock */
+
struct list_head groups;
uint n_groups; /* number of entries in @groups */
bool group_join_lockout; /* disable group join while reinit */
+ u32 vcid_pool; /* bitmask of VCID to be allocated */
+
+ /* end of fields protected by @groups_lock */
+
void *mmu_cookie; /* mmu driver private data */
void *dram_cookie; /* on-device DRAM private data */
struct edgetpu_mailbox_manager *mailbox_manager;
@@ -416,7 +422,7 @@ int edgetpu_get_state_errno_locked(struct edgetpu_dev *etdev);
/*
* "External mailboxes" below refers to mailboxes that are not handled
- * directly by the DarwiNN runtime, such as secure or device-to-device.
+ * directly by the runtime, such as secure or device-to-device.
*
* Chip specific code will typically keep track of state and inform the firmware
* that a mailbox has become active/inactive.
@@ -424,10 +430,10 @@ int edgetpu_get_state_errno_locked(struct edgetpu_dev *etdev);
/* Chip-specific code to acquire external mailboxes */
int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client,
- struct edgetpu_ext_mailbox *ext_mbox);
+ struct edgetpu_ext_mailbox_ioctl *args);
/* Chip-specific code to release external mailboxes */
int edgetpu_chip_release_ext_mailbox(struct edgetpu_client *client,
- struct edgetpu_ext_mailbox *ext_mbox);
+ struct edgetpu_ext_mailbox_ioctl *args);
#endif /* __EDGETPU_INTERNAL_H__ */
diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c
index c62ac73..0adf9a5 100644
--- a/drivers/edgetpu/edgetpu-kci.c
+++ b/drivers/edgetpu/edgetpu-kci.c
@@ -6,6 +6,7 @@
* Copyright (C) 2019 Google, Inc.
*/
+#include <linux/bits.h>
#include <linux/circ_buf.h>
#include <linux/device.h>
#include <linux/dma-mapping.h> /* dmam_alloc_coherent */
@@ -690,6 +691,41 @@ static int edgetpu_kci_send_cmd_return_resp(
return resp->code;
}
+static int edgetpu_kci_send_cmd_with_data(struct edgetpu_kci *kci,
+ struct edgetpu_command_element *cmd, const void *data,
+ size_t size)
+{
+ struct edgetpu_dev *etdev = kci->mailbox->etdev;
+ dma_addr_t dma_addr;
+ tpu_addr_t tpu_addr;
+ int ret;
+ void *ptr = dma_alloc_coherent(etdev->dev, size, &dma_addr, GFP_KERNEL);
+ const u32 flags = EDGETPU_MMU_DIE | EDGETPU_MMU_32 | EDGETPU_MMU_HOST;
+
+ if (!ptr)
+ return -ENOMEM;
+ memcpy(ptr, data, size);
+
+ tpu_addr = edgetpu_mmu_tpu_map(etdev, dma_addr, size, DMA_TO_DEVICE, EDGETPU_CONTEXT_KCI,
+ flags);
+ if (!tpu_addr) {
+ etdev_err(etdev, "%s: failed to map to TPU", __func__);
+ dma_free_coherent(etdev->dev, size, ptr, dma_addr);
+ return -ENOSPC;
+ }
+ etdev_dbg(etdev, "%s: map kva=%pK iova=0x%llx dma=%pad", __func__, ptr, tpu_addr,
+ &dma_addr);
+
+ cmd->dma.address = tpu_addr;
+ cmd->dma.size = size;
+ ret = edgetpu_kci_send_cmd(kci, cmd);
+ edgetpu_mmu_tpu_unmap(etdev, tpu_addr, size, EDGETPU_CONTEXT_KCI);
+ dma_free_coherent(etdev->dev, size, ptr, dma_addr);
+ etdev_dbg(etdev, "%s: unmap kva=%pK iova=0x%llx dma=%pad", __func__, ptr, tpu_addr,
+ &dma_addr);
+ return ret;
+}
+
int edgetpu_kci_send_cmd(struct edgetpu_kci *kci,
struct edgetpu_command_element *cmd)
{
@@ -741,51 +777,19 @@ int edgetpu_kci_map_trace_buffer(struct edgetpu_kci *kci, tpu_addr_t tpu_addr,
return edgetpu_kci_send_cmd(kci, &cmd);
}
-int edgetpu_kci_join_group(struct edgetpu_kci *kci, struct edgetpu_dev *etdev,
- u8 n_dies, u8 vid)
+int edgetpu_kci_join_group(struct edgetpu_kci *kci, u8 n_dies, u8 vid)
{
- struct edgetpu_kci_device_group_detail *detail;
- const u32 size = sizeof(*detail);
- dma_addr_t dma_addr;
- tpu_addr_t tpu_addr;
struct edgetpu_command_element cmd = {
.code = KCI_CODE_JOIN_GROUP,
- .dma = {
- .size = size,
- },
};
- const u32 flags = EDGETPU_MMU_DIE | EDGETPU_MMU_32 | EDGETPU_MMU_HOST;
- int ret;
+ const struct edgetpu_kci_device_group_detail detail = {
+ .n_dies = n_dies,
+ .vid = vid,
+ };
if (!kci)
return -ENODEV;
- detail = dma_alloc_coherent(etdev->dev, sizeof(*detail), &dma_addr,
- GFP_KERNEL);
- if (!detail)
- return -ENOMEM;
- detail->n_dies = n_dies;
- detail->vid = vid;
-
- tpu_addr = edgetpu_mmu_tpu_map(etdev, dma_addr, size, DMA_TO_DEVICE,
- EDGETPU_CONTEXT_KCI, flags);
- if (!tpu_addr) {
- etdev_err(etdev, "%s: failed to map group detail to TPU",
- __func__);
- dma_free_coherent(etdev->dev, size, detail, dma_addr);
- return -EINVAL;
- }
-
- cmd.dma.address = tpu_addr;
- etdev_dbg(etdev, "%s: map kva=%pK iova=0x%llx dma=%pad", __func__,
- detail, tpu_addr, &dma_addr);
-
- ret = edgetpu_kci_send_cmd(kci, &cmd);
- edgetpu_mmu_tpu_unmap(etdev, tpu_addr, size, EDGETPU_CONTEXT_KCI);
- dma_free_coherent(etdev->dev, size, detail, dma_addr);
- etdev_dbg(etdev, "%s: unmap kva=%pK iova=0x%llx dma=%pad", __func__,
- detail, tpu_addr, &dma_addr);
-
- return ret;
+ return edgetpu_kci_send_cmd_with_data(kci, &cmd, &detail, sizeof(detail));
}
int edgetpu_kci_leave_group(struct edgetpu_kci *kci)
@@ -989,26 +993,33 @@ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr,
return edgetpu_kci_send_cmd(kci, &cmd);
}
-int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids)
+int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open)
{
+ const struct edgetpu_kci_open_device_detail detail = {
+ .mailbox_id = mailbox_id,
+ .vcid = vcid,
+ .flags = first_open,
+ };
struct edgetpu_command_element cmd = {
.code = KCI_CODE_OPEN_DEVICE,
.dma = {
- .flags = mailbox_ids,
+ .flags = BIT(mailbox_id),
},
};
if (!kci)
return -ENODEV;
- return edgetpu_kci_send_cmd(kci, &cmd);
+ if (vcid < 0)
+ return edgetpu_kci_send_cmd(kci, &cmd);
+ return edgetpu_kci_send_cmd_with_data(kci, &cmd, &detail, sizeof(detail));
}
-int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids)
+int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id)
{
struct edgetpu_command_element cmd = {
.code = KCI_CODE_CLOSE_DEVICE,
.dma = {
- .flags = mailbox_ids,
+ .flags = BIT(mailbox_id),
},
};
diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h
index 05f87c8..97e4079 100644
--- a/drivers/edgetpu/edgetpu-kci.h
+++ b/drivers/edgetpu/edgetpu-kci.h
@@ -204,6 +204,29 @@ struct edgetpu_kci_device_group_detail {
u8 reserved[6]; /* padding */
};
+struct edgetpu_kci_open_device_detail {
+ /* The ID of mailbox to be opened. */
+ u16 mailbox_id;
+ /*
+ * Virtual context ID @mailbox_id is associated to.
+ * For device groups with @mailbox_detachable attribute the mailbox attached to the group
+ * can be different after wakelock re-acquired. Firmware uses this VCID to identify the
+ * device group.
+ */
+ u16 vcid;
+ /*
+ * Extra flags for the attributes of this request.
+ * Set RESERVED bits to 0 to ensure backwards compatibility.
+ *
+ * Bitfields:
+ * [0:0] - first_open: Specifies if this is the first time we are calling mailbox open
+ * KCI for this VCID after it has been allocated to a device group. This allows
+ * firmware to clean up/reset the memory allocator for that partition.
+ * [31:1] - RESERVED
+ */
+ u32 flags;
+};
+
/*
* Initializes a KCI object.
*
@@ -328,8 +351,7 @@ int edgetpu_kci_map_trace_buffer(struct edgetpu_kci *kci, tpu_addr_t tpu_addr,
*
* Returns the code of response, or a negative errno on error.
*/
-int edgetpu_kci_join_group(struct edgetpu_kci *kci, struct edgetpu_dev *etdev,
- u8 n_dies, u8 vid);
+int edgetpu_kci_join_group(struct edgetpu_kci *kci, u8 n_dies, u8 vid);
/* Informs the TPU to leave the group it currently belongs to. */
int edgetpu_kci_leave_group(struct edgetpu_kci *kci);
@@ -344,20 +366,20 @@ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr,
size_t size);
/*
- * Inform the firmware to prepare to serve the VII of @mailbox_ids.
+ * Inform the firmware to prepare to serve the VII of @mailbox_id.
*
* You usually shouldn't call this directly - consider using
* edgetpu_mailbox_activate() instead.
*/
-int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids);
+int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open);
/*
- * Inform the firmware the VII with @mailbox_ids are closed.
+ * Inform the firmware the VII with @mailbox_id is closed.
*
* You usually shouldn't call this directly - consider using
* edgetpu_mailbox_deactivate() instead.
*/
-int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids);
+int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id);
/* Cancel work queues or wait until they're done */
void edgetpu_kci_cancel_work_queues(struct edgetpu_kci *kci);
diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c
index 606b246..ba33eab 100644
--- a/drivers/edgetpu/edgetpu-mailbox.c
+++ b/drivers/edgetpu/edgetpu-mailbox.c
@@ -473,7 +473,8 @@ void edgetpu_mailbox_remove_vii(struct edgetpu_vii *vii)
edgetpu_mailbox_free_queue(etdev, vii->mailbox, &vii->cmd_queue_mem);
edgetpu_mailbox_free_queue(etdev, vii->mailbox, &vii->resp_queue_mem);
if (vii->mailbox) {
- edgetpu_mailbox_disable(vii->mailbox);
+ if (!vii->mailbox->internal.group->dev_inaccessible)
+ edgetpu_mailbox_disable(vii->mailbox);
edgetpu_device_group_put(vii->mailbox->internal.group);
edgetpu_mailbox_remove(etdev->mailbox_manager, vii->mailbox);
vii->mailbox = NULL;
@@ -768,14 +769,14 @@ void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev)
kfree(groups);
}
-int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids)
+int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_id)
{
int ret;
if (!edgetpu_wakelock_lock(client->wakelock)) {
etdev_err(client->etdev,
- "Enabling mailboxes %08x needs wakelock acquired\n",
- mailbox_ids);
+ "Enabling mailbox %d needs wakelock acquired\n",
+ mailbox_id);
edgetpu_wakelock_unlock(client->wakelock);
return -EAGAIN;
}
@@ -783,24 +784,24 @@ int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids)
edgetpu_wakelock_inc_event_locked(client->wakelock,
EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX);
- etdev_dbg(client->etdev, "Enabling mailboxes: %08X\n", mailbox_ids);
+ etdev_dbg(client->etdev, "Enabling mailbox: %d\n", mailbox_id);
- ret = edgetpu_mailbox_activate(client->etdev, mailbox_ids);
+ ret = edgetpu_mailbox_activate(client->etdev, mailbox_id, -1, false);
if (ret)
- etdev_err(client->etdev, "Activate mailboxes %08x failed: %d",
- mailbox_ids, ret);
+ etdev_err(client->etdev, "Activate mailbox %d failed: %d",
+ mailbox_id, ret);
edgetpu_wakelock_unlock(client->wakelock);
return ret;
}
-int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids)
+int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_id)
{
int ret;
if (!edgetpu_wakelock_lock(client->wakelock)) {
etdev_err(client->etdev,
- "Disabling mailboxes %08x needs wakelock acquired\n",
- mailbox_ids);
+ "Disabling mailbox %d needs wakelock acquired\n",
+ mailbox_id);
edgetpu_wakelock_unlock(client->wakelock);
return -EAGAIN;
}
@@ -808,47 +809,45 @@ int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids)
edgetpu_wakelock_dec_event_locked(client->wakelock,
EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX);
- etdev_dbg(client->etdev, "Disabling mailbox: %08X\n", mailbox_ids);
- ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_ids);
+ etdev_dbg(client->etdev, "Disabling mailbox: %d\n", mailbox_id);
+ ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_id);
if (ret)
- etdev_err(client->etdev, "Deactivate mailboxes %08x failed: %d",
- mailbox_ids, ret);
+ etdev_err(client->etdev, "Deactivate mailbox %d failed: %d",
+ mailbox_id, ret);
edgetpu_wakelock_unlock(client->wakelock);
return ret;
}
-int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids)
+int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open)
{
struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices;
- u32 to_send;
+ const u32 bit = BIT(mailbox_id);
int ret = 0;
mutex_lock(&eh->lock);
- to_send = mailbox_ids & ~eh->fw_state;
- if (to_send)
- ret = edgetpu_kci_open_device(etdev->kci, to_send);
+ if (bit & ~eh->fw_state)
+ ret = edgetpu_kci_open_device(etdev->kci, mailbox_id, vcid, first_open);
if (!ret) {
- eh->state |= mailbox_ids;
- eh->fw_state |= mailbox_ids;
+ eh->state |= bit;
+ eh->fw_state |= bit;
}
mutex_unlock(&eh->lock);
return ret;
}
-int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids)
+int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id)
{
struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices;
- u32 to_send;
+ const u32 bit = BIT(mailbox_id);
int ret = 0;
mutex_lock(&eh->lock);
- to_send = mailbox_ids & eh->fw_state;
- if (to_send)
- ret = edgetpu_kci_close_device(etdev->kci, to_send);
+ if (bit & eh->fw_state)
+ ret = edgetpu_kci_close_device(etdev->kci, mailbox_id);
if (!ret) {
- eh->state &= ~mailbox_ids;
- eh->fw_state &= ~mailbox_ids;
+ eh->state &= ~bit;
+ eh->fw_state &= ~bit;
}
mutex_unlock(&eh->lock);
return ret;
diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h
index 8425807..d0fb9ae 100644
--- a/drivers/edgetpu/edgetpu-mailbox.h
+++ b/drivers/edgetpu/edgetpu-mailbox.h
@@ -302,26 +302,26 @@ void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev);
int edgetpu_mailbox_p2p_batch(struct edgetpu_mailbox_manager *mgr, uint n,
uint skip_i, struct edgetpu_mailbox **mailboxes);
-/* Notify firmware of external mailboxes becoming active */
-int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids);
+/* Notify firmware of an external mailbox becoming active */
+int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_id);
-/* Notify firmware of external mailboxes becoming inactive */
-int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids);
+/* Notify firmware of an external mailbox becoming inactive */
+int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_id);
/*
- * Activates @mailbox_ids, OPEN_DEVICE KCI will be sent.
+ * Activates @mailbox_id, OPEN_DEVICE KCI will be sent.
*
- * If @mailbox_ids are known to be activated, KCI is not sent and this function
+ * If @mailbox_id is known to be activated, KCI is not sent and this function
* returns 0.
*
* Returns what edgetpu_kci_open_device() returned.
* Caller ensures device is powered on.
*/
-int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids);
+int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open);
/*
* Similar to edgetpu_mailbox_activate() but sends CLOSE_DEVICE KCI instead.
*/
-int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids);
+int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id);
/* Sets @eh->fw_state to 0. */
void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh);
diff --git a/drivers/edgetpu/edgetpu-mmu.h b/drivers/edgetpu/edgetpu-mmu.h
index 8c5ae3c..094f14d 100644
--- a/drivers/edgetpu/edgetpu-mmu.h
+++ b/drivers/edgetpu/edgetpu-mmu.h
@@ -199,9 +199,7 @@ void edgetpu_mmu_free(struct edgetpu_dev *etdev, tpu_addr_t tpu_addr,
*
* Description: Add a mapping from iova -> paddr to the MMU for the chip.
* paddr can be considered a physical address from the TPU's viewpoint, but
- * may actually be another IOVA for another IOMMU downstream of the chip MMU
- * (as on Hermosa, where the SMMU translates TPU VAs to IOVAs sent to the IOMMU
- * downstream of the TPU).
+ * may actually be another IOVA for another IOMMU downstream of the chip MMU.
*
* Note: for chipsets with edgetpu_mmu_alloc() support, @iova passed to this
* function must be either allocated from edgetpu_mmu_alloc() or reserved by
@@ -230,12 +228,12 @@ void edgetpu_mmu_remove_translation(struct edgetpu_dev *etdev,
* @context_id: context ID for the mapping
* @mmu_flags: the flag or'ed with EDGETPU_MMU_* macros
*
- * Description: For chips with internal MMUs (e.g., Hermosa SMMU), add the
- * required internal MMU mapping for the TPU to access @downstream_addr, the
- * DMA or physical address of the buffer as returned by the Linux DMA API when
- * the DMA mapping was created. This can be used with, for example, buffers
- * allocated using dma_alloc_coherent(), which are mapped appropriately for
- * any downstream IOMMU and must be mapped to the TPU internal MMU as well.
+ * Description: For chips with internal MMUs, add the required internal MMU
+ * mapping for the TPU to access @down_addr, the DMA or physical address of the
+ * buffer as returned by the Linux DMA API when the DMA mapping was created.
+ * This can be used with, for example, buffers allocated using
+ * dma_alloc_coherent(), which are mapped appropriately for any downstream IOMMU
+ * and must be mapped to the TPU internal MMU as well.
*
* For a chip that doesn't have an internal MMU but has the IOMMU domain AUX
* feature, perform the necessary mapping to @context_id and return the
@@ -261,9 +259,8 @@ void edgetpu_mmu_tpu_unmap(struct edgetpu_dev *etdev,
* @context_id: context ID for the mapping
* @mmu_flags: the flag or'ed with EDGETPU_MMU_* macros
*
- * Description: For chips with internal MMUs (e.g., Hermosa SMMU), add the
- * required internal MMU mapping for the TPU to access the DMA addresses of
- * @sgt.
+ * Description: For chips with internal MMUs, add the required internal MMU
+ * mapping for the TPU to access the DMA addresses of @sgt.
*
* For a chip that doesn't have an internal MMU but has the IOMMU domain AUX
* feature, perform the necessary mapping to @context_id and return the
diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c
index 1e28141..df1c179 100644
--- a/drivers/edgetpu/edgetpu-pm.c
+++ b/drivers/edgetpu/edgetpu-pm.c
@@ -242,7 +242,7 @@ static int pchannel_state_change_request(struct edgetpu_dev *etdev, int state)
if (state == STATE_RUN)
return 0;
- /* Phase 3: R52 acknowledgment */
+ /* Phase 3: CPU acknowledgment */
ret = etdev_poll_power_state(etdev, val,
(val & PACCEPT) || (val & PDENY));
if (val & PDENY) {
diff --git a/drivers/edgetpu/edgetpu-sw-watchdog.c b/drivers/edgetpu/edgetpu-sw-watchdog.c
index 5d96e4d..4e7f681 100644
--- a/drivers/edgetpu/edgetpu-sw-watchdog.c
+++ b/drivers/edgetpu/edgetpu-sw-watchdog.c
@@ -5,6 +5,7 @@
* Copyright (C) 2020 Google, Inc.
*/
+#include <asm/barrier.h>
#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/slab.h>
@@ -124,21 +125,29 @@ int edgetpu_sw_wdt_create(struct edgetpu_dev *etdev, unsigned long active_ms,
int edgetpu_sw_wdt_start(struct edgetpu_dev *etdev)
{
- struct edgetpu_sw_wdt *etdev_sw_wdt = etdev->etdev_sw_wdt;
+ struct edgetpu_sw_wdt *wdt;
- if (!etdev_sw_wdt)
+ /* to match edgetpu_sw_wdt_destroy() */
+ smp_mb();
+ wdt = etdev->etdev_sw_wdt;
+ if (!wdt)
return -EINVAL;
- if (!etdev_sw_wdt->et_action_work.edgetpu_sw_wdt_handler)
+ if (!wdt->et_action_work.edgetpu_sw_wdt_handler)
etdev_err(etdev, "sw wdt handler not set\n");
- sw_wdt_start(etdev_sw_wdt);
+ sw_wdt_start(wdt);
return 0;
}
void edgetpu_sw_wdt_stop(struct edgetpu_dev *etdev)
{
- if (!etdev->etdev_sw_wdt)
+ struct edgetpu_sw_wdt *wdt;
+
+ /* to match edgetpu_sw_wdt_destroy() */
+ smp_mb();
+ wdt = etdev->etdev_sw_wdt;
+ if (!wdt)
return;
- sw_wdt_stop(etdev->etdev_sw_wdt);
+ sw_wdt_stop(wdt);
}
void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev)
@@ -149,9 +158,14 @@ void edgetpu_sw_wdt_destroy(struct edgetpu_dev *etdev)
if (!wdt)
return;
etdev->etdev_sw_wdt = NULL;
+ /*
+ * To ensure that etdev->etdev_sw_wdt is NULL so wdt_start() calls from other processes
+ * won't start the watchdog again.
+ */
+ smp_mb();
+ sw_wdt_stop(wdt);
/* cancel and sync work due to watchdog bite to prevent UAF */
cancel_work_sync(&wdt->et_action_work.work);
- sw_wdt_stop(wdt);
counter = atomic_read(&wdt->active_counter);
if (counter)
etdev_warn(etdev, "Unbalanced WDT active counter: %d", counter);
diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h
index 7ea3e9d..20d5ad7 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.h
+++ b/drivers/edgetpu/edgetpu-usage-stats.h
@@ -36,9 +36,9 @@ struct tpu_usage {
* Must be kept in sync with firmware struct Component.
*/
enum edgetpu_usage_component {
- /* The device as a whole (TPU, R52, DMA330, etc.) */
+ /* The device as a whole */
EDGETPU_USAGE_COMPONENT_DEVICE = 0,
- /* Just the TPU core (scalar core and tiles) */
+ /* Just the TPU core */
EDGETPU_USAGE_COMPONENT_TPU = 1,
EDGETPU_USAGE_COMPONENT_COUNT = 2, /* number of components above */
};
@@ -112,20 +112,9 @@ struct __packed edgetpu_usage_max_watermark {
/* An enum to identify the tracked firmware threads. */
/* Must be kept in sync with firmware enum class UsageTrackerThreadId. */
enum edgetpu_usage_threadid {
- EDGETPU_FW_THREAD_MAIN = 0,
- EDGETPU_FW_THREAD_KCI_HANDLER = 1,
- EDGETPU_FW_THREAD_POWER_ADMIN = 2,
- EDGETPU_FW_THREAD_VII_SCHEDULER = 3,
- EDGETPU_FW_THREAD_VII_HANDLER = 4,
- EDGETPU_FW_THREAD_MCP_GRAPH_DRIVER = 5,
- EDGETPU_FW_THREAD_SCP_GRAPH_DRIVER = 6,
- EDGETPU_FW_THREAD_TPU_DRIVER = 7,
- EDGETPU_FW_THREAD_RESTART_HANDLER = 8,
- EDGETPU_FW_THREAD_POLL_SERVICE = 9,
- EDGETPU_FW_THREAD_DMA_DRIVER = 10,
- EDGETPU_FW_THREAD_GRAPH_DMA_DRIVER = 11,
-
- /* Number of task identifiers above. */
+ /* Individual thread IDs are not tracked. */
+
+ /* Number of task identifiers. */
EDGETPU_FW_THREAD_COUNT = 12,
};
diff --git a/drivers/edgetpu/edgetpu.h b/drivers/edgetpu/edgetpu.h
index 93d7afa..b5a52c2 100644
--- a/drivers/edgetpu/edgetpu.h
+++ b/drivers/edgetpu/edgetpu.h
@@ -154,6 +154,9 @@ struct edgetpu_event_register {
* released.
*/
#define EDGETPU_PRIORITY_DETACHABLE (1u << 3)
+/* For @partition_type. */
+#define EDGETPU_PARTITION_NORMAL 0
+#define EDGETPU_PARTITION_EXTRA 1
struct edgetpu_mailbox_attr {
/*
* There are limitations on these size fields, see the error cases in
@@ -166,6 +169,8 @@ struct edgetpu_mailbox_attr {
__u32 sizeof_resp; /* size of response element in bytes */
__u32 priority : 4; /* mailbox service priority */
__u32 cmdq_tail_doorbell: 1; /* auto doorbell on cmd queue tail move */
+ /* Type of memory partitions to be used for this group, exact meaning is chip-dependent. */
+ __u32 partition_type : 1;
};
/*
@@ -517,14 +522,14 @@ struct edgetpu_device_dram_usage {
_IOR(EDGETPU_IOCTL_BASE, 29, struct edgetpu_device_dram_usage)
/*
- * struct edgetpu_ext_mailbox
+ * struct edgetpu_ext_mailbox_ioctl
* @client_id: Client identifier (may not be needed depending on type)
* @attrs: Array of mailbox attributes (pointer to
* edgetpu_mailbox_attr, may be NULL depending on type)
* @type: One of the EDGETPU_EXT_MAILBOX_xxx values
* @count: Number of mailboxes to acquire
*/
-struct edgetpu_ext_mailbox {
+struct edgetpu_ext_mailbox_ioctl {
__u64 client_id;
__u64 attrs;
__u32 type;
@@ -536,13 +541,53 @@ struct edgetpu_ext_mailbox {
* runtime. This can be a secure mailbox or a device-to-device mailbox.
*/
#define EDGETPU_ACQUIRE_EXT_MAILBOX \
- _IOW(EDGETPU_IOCTL_BASE, 30, struct edgetpu_ext_mailbox)
+ _IOW(EDGETPU_IOCTL_BASE, 30, struct edgetpu_ext_mailbox_ioctl)
/*
* Release a chip-specific mailbox that is not directly managed by the TPU
* runtime. This can be a secure mailbox or a device-to-device mailbox.
*/
#define EDGETPU_RELEASE_EXT_MAILBOX \
- _IOW(EDGETPU_IOCTL_BASE, 31, struct edgetpu_ext_mailbox)
+ _IOW(EDGETPU_IOCTL_BASE, 31, struct edgetpu_ext_mailbox_ioctl)
+
+/* Fatal error event bitmasks... */
+/* Firmware crash in non-restartable thread */
+#define EDGETPU_ERROR_FW_CRASH 0x1
+/* Host or device watchdog timeout */
+#define EDGETPU_ERROR_WATCHDOG_TIMEOUT 0x2
+/* Thermal shutdown */
+#define EDGETPU_ERROR_THERMAL_STOP 0x4
+/* TPU hardware inaccessible: link fail, memory protection unit blocking... */
+#define EDGETPU_ERROR_HW_NO_ACCESS 0x8
+/* Various hardware failures */
+#define EDGETPU_ERROR_HW_FAIL 0x10
+
+/*
+ * Return fatal errors raised for the client's device group, as a bitmask of
+ * the above fatal error event codes, or zero if no errors encountered or
+ * client is not part of a device group.
+ */
+#define EDGETPU_GET_FATAL_ERRORS \
+ _IOR(EDGETPU_IOCTL_BASE, 32, __u32)
+
+/* Fatal error event bitmasks... */
+/* Firmware crash in non-restartable thread */
+#define EDGETPU_ERROR_FW_CRASH 0x1
+/* Host or device watchdog timeout */
+#define EDGETPU_ERROR_WATCHDOG_TIMEOUT 0x2
+/* Thermal shutdown */
+#define EDGETPU_ERROR_THERMAL_STOP 0x4
+/* TPU hardware inaccessible: link fail, memory protection unit blocking... */
+#define EDGETPU_ERROR_HW_NO_ACCESS 0x8
+/* Various hardware failures */
+#define EDGETPU_ERROR_HW_FAIL 0x10
+
+/*
+ * Return fatal errors raised for the client's device group, as a bitmask of
+ * the above fatal error event codes, or zero if no errors encountered or
+ * client is not part of a device group.
+ */
+#define EDGETPU_GET_FATAL_ERRORS \
+ _IOR(EDGETPU_IOCTL_BASE, 32, __u32)
#endif /* __EDGETPU_H__ */