diff options
author | Zuma copybara merger <zuma-automerger@google.com> | 2023-03-02 17:04:37 +0800 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-03-02 03:45:49 -0800 |
commit | 7f2d22eee75783f5def662dc34636e15648a7bf6 (patch) | |
tree | fc2ec4a2c07cab1433daa7770f026ad5747229c4 | |
parent | c01d777cb9feb30c3ccec179bf5098484294d0eb (diff) | |
download | rio-7f2d22eee75783f5def662dc34636e15648a7bf6.tar.gz |
[Copybara Auto Merge] Merge branch zuma into android14-gs-pixel-5.15
edgetpu: Add edgetpu_soc_thermal_exit
Bug: 264729080
edgetpu: Add edgetpu_thermal_destroy
Bug: 264729080 (repeat)
edgetpu: Add missing thermal node_name
Bug: 264729080 (repeat)
edgetpu: Add const to gcip_thermal_args
Bug: 264729080 (repeat)
gcip: Add gcip_thermal_destroy
Bug; 264729080 (repeat)
gcip: Add thermal votes
Bug: 271194361
Bug: 264729080 (repeat)
gcip: Cleanup abandoned domains on domain-pool destroy
gcip: Prefix MAX_NUM_THERMAL_STATES
Bug: 264729080 (repeat)
gcip: Add const to thermal_cooling_device_ops
Bug: 264729080 (repeat)
gcip: Add gcip_thermal_destroy
Bug: 264729080 (repeat)
gcip: Add thermal votes
Bug: 271194361 (repeat)
Bug: 264729080 (repeat)
gcip: Add missing includes to gcip-domain-pool.h
gcip: Add list of dynamic domains to domain-pool
gcip: Prefix MAX_NUM_THERMAL_STATES
Bug: 264729080 (repeat)
gcip: add watchdog timeout crash type
Bug:255416846
gcip: Add thermal header
Bug: 264729080 (repeat)
edgetpu: Adopt GCIP thermal
Bug: 264729080 (repeat)
edgetpu: Kbuild: Expand objs list
Bug: None
gcip: Add thermal support
Bug: 264729080 (repeat)
gcip: remove redundant else in pm.c
Signed-off-by: Zuma copybara merger <zuma-automerger@google.com>
GitOrigin-RevId: 2b37ee7805825c4d527e2c90392e97abde7d5cc1
Change-Id: I0a40e0cd98e47c23251a57e67906559747e12565
20 files changed, 865 insertions, 735 deletions
diff --git a/drivers/edgetpu/Kbuild b/drivers/edgetpu/Kbuild index 5c20205..92554c6 100644 --- a/drivers/edgetpu/Kbuild +++ b/drivers/edgetpu/Kbuild @@ -18,20 +18,37 @@ obj-$(CONFIG_RIO) += rio.o GCIP_DIR=gcip-kernel-driver/drivers/gcip -edgetpu-objs := edgetpu-async.o edgetpu-dmabuf.o edgetpu-iremap-pool.o \ - edgetpu-kci.o edgetpu-mailbox.o edgetpu-mapping.o \ - edgetpu-sw-watchdog.o edgetpu-telemetry.o \ - edgetpu-firmware-util.o edgetpu-firmware.o +edgetpu-objs := edgetpu-async.o \ + edgetpu-dmabuf.o \ + edgetpu-firmware-util.o \ + edgetpu-firmware.o \ + edgetpu-iremap-pool.o \ + edgetpu-kci.o \ + edgetpu-mailbox.o \ + edgetpu-mapping.o \ + edgetpu-sw-watchdog.o \ + edgetpu-telemetry.o \ + edgetpu-thermal.o ifndef CONFIG_EDGETPU_TEST # Unit testing doesn't need this because GCIP is compiled as a built-in there. edgetpu-objs += $(GCIP_DIR)/gcip.o endif -rio-objs := rio-core.o rio-debug-dump.o rio-device-group.o rio-device.o \ - rio-firmware.o rio-fs.o rio-iommu.o rio-platform.o rio-pm.o \ - rio-thermal.o rio-usage-stats.o rio-wakelock.o rio-external.o \ - rio-soc.o $(edgetpu-objs) +rio-objs := rio-core.o \ + rio-debug-dump.o \ + rio-device-group.o \ + rio-device.o \ + rio-external.o \ + rio-firmware.o \ + rio-fs.o \ + rio-iommu.o \ + rio-platform.o \ + rio-pm.o \ + rio-soc.o \ + rio-usage-stats.o \ + rio-wakelock.o \ + $(edgetpu-objs) CFLAGS_rio-fs.o := -DCONFIG_RIO=1 CFLAGS_rio-core.o := -DCONFIG_RIO=1 @@ -41,7 +58,6 @@ CFLAGS_rio-firmware.o := -DCONFIG_RIO=1 CFLAGS_rio-iommu.o := -DCONFIG_RIO=1 CFLAGS_rio-platform.o := -DCONFIG_RIO=1 CFLAGS_rio-pm.o := -DCONFIG_RIO=1 -CFLAGS_rio-thermal.o := -DCONFIG_RIO=1 CFLAGS_rio-debug-dump.o := -DCONFIG_RIO=1 CFLAGS_rio-usage-stats.o := -DCONFIG_RIO=1 CFLAGS_rio-wakelock.o := -DCONFIG_RIO=1 diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index 6bfc199..f920a59 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <gcip/gcip-pm.h> +#include <gcip/gcip-thermal.h> #include "edgetpu.h" #include "edgetpu-debug-dump.h" @@ -182,7 +183,7 @@ static int edgetpu_firmware_handshake(struct edgetpu_firmware *et_fw) if (ret) etdev_warn(etdev, "firmware tracing restore error: %d", ret); - ret = edgetpu_thermal_restore(etdev); + ret = gcip_thermal_restore_on_powering(etdev->thermal); if (ret) etdev_warn(etdev, "thermal restore error: %d", ret); diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 4cc1e70..e57dddb 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -500,7 +500,7 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client) { int count = 0; int ret = 0; - struct edgetpu_thermal *thermal = client->etdev->thermal; + struct gcip_thermal *thermal = client->etdev->thermal; trace_edgetpu_acquire_wakelock_start(current->pid); @@ -513,11 +513,9 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client) */ client->pid = current->pid; client->tgid = current->tgid; - edgetpu_thermal_lock(thermal); - if (edgetpu_thermal_is_suspended(thermal)) + if (gcip_thermal_is_device_suspended(thermal)) /* TPU is thermal suspended, so fail acquiring wakelock */ ret = -EAGAIN; - edgetpu_thermal_unlock(thermal); if (ret) { etdev_warn_ratelimited(client->etdev, diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h index fe72da6..bac105a 100644 --- a/drivers/edgetpu/edgetpu-internal.h +++ b/drivers/edgetpu/edgetpu-internal.h @@ -25,7 +25,6 @@ #include <linux/irqreturn.h> #include <linux/mm_types.h> #include <linux/mutex.h> -#include <linux/notifier.h> #include <linux/refcount.h> #include <linux/scatterlist.h> #include <linux/types.h> @@ -33,10 +32,9 @@ #include <gcip/gcip-firmware.h> #include <gcip/gcip-pm.h> +#include <gcip/gcip-thermal.h> #include "edgetpu.h" -#include "edgetpu-thermal.h" -#include "edgetpu-usage-stats.h" #define get_dev_for_logging(etdev) \ ((etdev)->etiface && (etdev)->etiface->etcdev ? (etdev)->etiface->etcdev : (etdev)->dev) @@ -201,7 +199,7 @@ struct edgetpu_dev { struct edgetpu_firmware *firmware; /* firmware management */ struct gcip_fw_tracing *fw_tracing; /* firmware tracing */ struct edgetpu_telemetry_ctx *telemetry; - struct edgetpu_thermal *thermal; + struct gcip_thermal *thermal; struct edgetpu_usage_stats *usage_stats; /* usage stats private data */ struct gcip_pm *pm; /* Power management interface */ /* Memory pool in instruction remap region */ @@ -222,7 +220,6 @@ struct edgetpu_dev { struct work_struct debug_dump_work; struct mutex freq_lock; /* protects below freq_* variables */ - struct notifier_block pmqos_nb; /* PMQoS notifier struct */ uint32_t *freq_table; /* Array to record reported frequencies by f/w */ uint32_t freq_count; /* Number of entries in freq_table */ }; diff --git a/drivers/edgetpu/edgetpu-mobile-platform.c b/drivers/edgetpu/edgetpu-mobile-platform.c index 3478367..0f38c29 100644 --- a/drivers/edgetpu/edgetpu-mobile-platform.c +++ b/drivers/edgetpu/edgetpu-mobile-platform.c @@ -22,6 +22,7 @@ #include "edgetpu-mobile-platform.h" #include "edgetpu-soc.h" #include "edgetpu-telemetry.h" +#include "edgetpu-thermal.h" #include "mobile-firmware.h" #include "mobile-pm.h" @@ -410,19 +411,21 @@ static int edgetpu_mobile_platform_probe(struct platform_device *pdev, goto out_tel_exit; } - etdev_dbg(etdev, "Creating thermal device"); - etdev->thermal = devm_tpu_thermal_create(etdev->dev, etdev); + ret = edgetpu_thermal_create(etdev); + if (ret) + etdev_warn(etdev, "Failed to create thermal device: %d", ret); + ret = edgetpu_mobile_platform_set_fw_ctx_memory(etmdev); if (ret) { etdev_err(etdev, "Failed to initialize fw context memory: %d", ret); - goto out_destroy_fw; + goto out_destroy_thermal; } if (etmdev->after_probe) { ret = etmdev->after_probe(etmdev); if (ret) { dev_err(dev, "after_probe callback failed: %d", ret); - goto out_destroy_fw; + goto out_destroy_thermal; } } @@ -433,7 +436,9 @@ static int edgetpu_mobile_platform_probe(struct platform_device *pdev, edgetpu_debug_pointer = etdev; return 0; -out_destroy_fw: + +out_destroy_thermal: + edgetpu_thermal_destroy(etdev); edgetpu_mobile_firmware_destroy(etdev); out_tel_exit: edgetpu_telemetry_exit(etdev); @@ -456,6 +461,7 @@ static int edgetpu_mobile_platform_remove(struct platform_device *pdev) struct edgetpu_dev *etdev = platform_get_drvdata(pdev); struct edgetpu_mobile_platform_dev *etmdev = to_mobile_dev(etdev); + edgetpu_thermal_destroy(etdev); edgetpu_mobile_firmware_destroy(etdev); edgetpu_platform_remove_irq(etmdev); gcip_pm_get(etdev->pm); diff --git a/drivers/edgetpu/edgetpu-soc.h b/drivers/edgetpu/edgetpu-soc.h index 98b1f3d..1318726 100644 --- a/drivers/edgetpu/edgetpu-soc.h +++ b/drivers/edgetpu/edgetpu-soc.h @@ -12,7 +12,6 @@ #include "edgetpu-internal.h" #include "edgetpu-kci.h" -#include "edgetpu-thermal.h" /* SoC-specific calls for the following functions. */ @@ -50,6 +49,9 @@ void edgetpu_soc_handle_reverse_kci(struct edgetpu_dev *etdev, struct gcip_kci_response_element *resp); /* Init thermal subsystem SoC specifics for TPU */ -void edgetpu_soc_thermal_init(struct edgetpu_thermal *thermal); +void edgetpu_soc_thermal_init(struct edgetpu_dev *etdev); + +/* De-init thermal subsystem SoC specifics for TPU */ +void edgetpu_soc_thermal_exit(struct edgetpu_dev *etdev); #endif /* __EDGETPU_SOC_H__ */ diff --git a/drivers/edgetpu/edgetpu-thermal.c b/drivers/edgetpu/edgetpu-thermal.c new file mode 100644 index 0000000..7ea03c7 --- /dev/null +++ b/drivers/edgetpu/edgetpu-thermal.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * EdgeTPU thermal management. + * + * Copyright (C) 2023 Google LLC + */ + +#include <linux/device.h> + +#include <gcip/gcip-pm.h> +#include <gcip/gcip-thermal.h> + +#include "edgetpu-internal.h" +#include "edgetpu-kci.h" +#include "edgetpu-soc.h" +#include "edgetpu-thermal.h" + +static int __edgetpu_thermal_get_rate(void *data, unsigned long *rate) +{ + struct edgetpu_dev *etdev = data; + long ret = edgetpu_soc_pm_get_rate(etdev, 0); + + if (ret < 0) + return ret; + + *rate = ret; + + return 0; +} + +static int __edgetpu_thermal_set_rate(void *data, unsigned long rate) +{ + struct edgetpu_dev *etdev = data; + int ret; + + edgetpu_kci_block_bus_speed_control(etdev, true); + + ret = edgetpu_kci_notify_throttling(etdev, rate); + if (ret) + etdev_err_ratelimited(etdev, "Failed to notify FW about power rate %lu, error:%d", + rate, ret); + + edgetpu_kci_block_bus_speed_control(etdev, false); + + return ret; +} + +int edgetpu_thermal_set_rate(struct edgetpu_dev *etdev, unsigned long rate) +{ + return __edgetpu_thermal_set_rate(etdev, rate); +} + +static int __edgetpu_thermal_control(void *data, bool enable) +{ + return edgetpu_kci_thermal_control(data, enable); +} + +int edgetpu_thermal_create(struct edgetpu_dev *etdev) +{ + const struct gcip_thermal_args args = { + .dev = etdev->dev, + .pm = etdev->pm, + .dentry = edgetpu_fs_debugfs_dir(), + .node_name = EDGETPU_COOLING_NAME, + .type = EDGETPU_COOLING_NAME, + .data = etdev, + .get_rate = __edgetpu_thermal_get_rate, + .set_rate = __edgetpu_thermal_set_rate, + .control = __edgetpu_thermal_control, + }; + struct gcip_thermal *thermal = gcip_thermal_create(&args); + + if (IS_ERR(thermal)) + return PTR_ERR(thermal); + + etdev->thermal = thermal; + edgetpu_soc_thermal_init(etdev); + + return 0; +} + +void edgetpu_thermal_destroy(struct edgetpu_dev *etdev) +{ + edgetpu_soc_thermal_exit(etdev); + gcip_thermal_destroy(etdev->thermal); + etdev->thermal = NULL; +} diff --git a/drivers/edgetpu/edgetpu-thermal.h b/drivers/edgetpu/edgetpu-thermal.h index f1280a0..588a7c7 100644 --- a/drivers/edgetpu/edgetpu-thermal.h +++ b/drivers/edgetpu/edgetpu-thermal.h @@ -1,126 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * EdgeTPU thermal driver header. + * EdgeTPU thermal management header. * - * Copyright (C) 2020 Google, Inc. + * Copyright (C) 2020-2023 Google LLC */ #ifndef __EDGETPU_THERMAL_H__ #define __EDGETPU_THERMAL_H__ -#include <linux/debugfs.h> -#include <linux/device.h> -#include <linux/mutex.h> -#include <linux/thermal.h> - #include "edgetpu-internal.h" -#define EDGETPU_COOLING_NAME "tpu_cooling" - -struct edgetpu_thermal { - struct device *dev; - struct dentry *cooling_root; - struct thermal_cooling_device *cdev; - struct mutex lock; - void *op_data; - unsigned long cooling_state; - unsigned long sysfs_req; - unsigned int tpu_num_states; - struct edgetpu_dev *etdev; - bool thermal_suspended; /* TPU thermal suspended state */ - unsigned long thermal_vote[2]; /* Thermal vote array, idx0: Tskin; idx1: BCL */ - bool enabled; -}; - -struct edgetpu_state_pwr { - unsigned long state; - u32 power; -}; - -/* - * Creates a managed edgetpu_thermal object. - * - * Returns -errno on error. - */ -struct edgetpu_thermal *devm_tpu_thermal_create(struct device *dev, - struct edgetpu_dev *etdev); - -/* - * Marks the TPU is suspended and informs TPU device if it's powered. - * - * Returns 0 on success. - */ -int edgetpu_thermal_suspend(struct device *dev); -/* - * Resumes the TPU from the suspend state and informs TPU CPU if it's powered. - * - * Returns 0 on success. - */ -int edgetpu_thermal_resume(struct device *dev); - -/* - * Sends the thermal throttling KCI if the device is powered. - * - * Returns the return value of KCI if the device is powered, otherwise -EAGAIN. - */ -int edgetpu_thermal_kci_if_powered(struct edgetpu_dev *etdev, u32 state); - -/* - * Sends thermal throttling KCI to restore the last thermal state. - * - * The caller must guarantee the device stays powered up, typically by calling gcip_pm_get() or - * by calling this function from the power management functions themselves. - * - * Returns 0 if no thermal throttling required; otherwise the return value of KCI. - */ -int edgetpu_thermal_restore(struct edgetpu_dev *etdev); - -/* - * Callback for BCL vote for throttling - * - * This goes through the same path as regular Tskin throttling - * - * Returns 0 if successful, otherwise negative error. - */ -int edgetpu_set_cur_state_bcl(struct thermal_cooling_device *cdev, unsigned long state_original); - -/* - * API to map frequency to cooling state - * - * Returns state if successful. On an invalid input it returns lowest state. - */ -int edgetpu_state_to_cooling(struct edgetpu_dev *etdev, unsigned long state); +#define EDGETPU_COOLING_NAME "tpu-cooling" -/* - * Holds thermal->lock. - * - * Does nothing if the thermal management is not supported. - */ -static inline void edgetpu_thermal_lock(struct edgetpu_thermal *thermal) -{ - if (!IS_ERR_OR_NULL(thermal)) - mutex_lock(&thermal->lock); -} - -/* - * Checks whether device is thermal suspended. - * Returns false if the thermal management is not supported. - */ -static inline bool edgetpu_thermal_is_suspended(struct edgetpu_thermal *thermal) -{ - if (!IS_ERR_OR_NULL(thermal)) - return thermal->thermal_suspended; - return false; -} - -/* - * Releases thermal->lock. - * - * Does nothing if the thermal management is not supported. - */ -static inline void edgetpu_thermal_unlock(struct edgetpu_thermal *thermal) -{ - if (!IS_ERR_OR_NULL(thermal)) - mutex_unlock(&thermal->lock); -} +int edgetpu_thermal_create(struct edgetpu_dev *etdev); +void edgetpu_thermal_destroy(struct edgetpu_dev *etdev); +int edgetpu_thermal_set_rate(struct edgetpu_dev *etdev, unsigned long rate); #endif /* __EDGETPU_THERMAL_H__ */ diff --git a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/Makefile b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/Makefile index bc370e5..c47f1c5 100644 --- a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/Makefile +++ b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/Makefile @@ -15,7 +15,8 @@ gcip-objs := gcip-alloc-helper.o \ gcip-mailbox.o \ gcip-mem-pool.o \ gcip-pm.o \ - gcip-telemetry.o + gcip-telemetry.o \ + gcip-thermal.o CURRENT_DIR=$(dir $(abspath $(lastword $(MAKEFILE_LIST)))) diff --git a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-domain-pool.c b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-domain-pool.c index 2341b52..c3c41ea 100644 --- a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-domain-pool.c +++ b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-domain-pool.c @@ -12,6 +12,11 @@ #include <gcip/gcip-domain-pool.h> +struct dynamic_domain { + struct list_head list_entry; + struct iommu_domain *domain; +}; + int gcip_domain_pool_init(struct device *dev, struct gcip_domain_pool *pool, unsigned int size) { unsigned int i; @@ -19,6 +24,8 @@ int gcip_domain_pool_init(struct device *dev, struct gcip_domain_pool *pool, uns pool->size = size; pool->dev = dev; + INIT_LIST_HEAD(&pool->dynamic_domains); + mutex_init(&pool->lock); if (!size) return 0; @@ -48,9 +55,23 @@ int gcip_domain_pool_init(struct device *dev, struct gcip_domain_pool *pool, uns struct iommu_domain *gcip_domain_pool_alloc(struct gcip_domain_pool *pool) { int id; + struct dynamic_domain *ddomain; - if (!pool->size) - return iommu_domain_alloc(pool->dev->bus); + if (!pool->size) { + ddomain = vzalloc(sizeof(*ddomain)); + if (!ddomain) + return ERR_PTR(-ENOMEM); + + ddomain->domain = iommu_domain_alloc(pool->dev->bus); + if (!ddomain->domain) { + vfree(ddomain); + return NULL; + } + mutex_lock(&pool->lock); + list_add_tail(&ddomain->list_entry, &pool->dynamic_domains); + mutex_unlock(&pool->lock); + return ddomain->domain; + } id = ida_alloc_max(&pool->idp, pool->size - 1, GFP_KERNEL); @@ -67,11 +88,25 @@ struct iommu_domain *gcip_domain_pool_alloc(struct gcip_domain_pool *pool) void gcip_domain_pool_free(struct gcip_domain_pool *pool, struct iommu_domain *domain) { int id; + struct dynamic_domain *ddomain; + struct list_head *cur, *nxt; if (!pool->size) { - iommu_domain_free(domain); + mutex_lock(&pool->lock); + list_for_each_safe(cur, nxt, &pool->dynamic_domains) { + ddomain = container_of(cur, struct dynamic_domain, list_entry); + if (ddomain->domain == domain) { + list_del(&ddomain->list_entry); + mutex_unlock(&pool->lock); + iommu_domain_free(domain); + vfree(ddomain); + return; + } + } + mutex_unlock(&pool->lock); return; } + for (id = 0; id < pool->size; id++) { if (pool->array[id] == domain) { dev_dbg(pool->dev, "Released domain from pool with id = %d\n", id); @@ -85,9 +120,20 @@ void gcip_domain_pool_free(struct gcip_domain_pool *pool, struct iommu_domain *d void gcip_domain_pool_destroy(struct gcip_domain_pool *pool) { int i; + struct dynamic_domain *ddomain; + struct list_head *cur, *nxt; - if (!pool->size) + if (!pool->size) { + mutex_lock(&pool->lock); + list_for_each_safe(cur, nxt, &pool->dynamic_domains) { + ddomain = container_of(cur, struct dynamic_domain, list_entry); + list_del(&ddomain->list_entry); + iommu_domain_free(ddomain->domain); + vfree(ddomain); + } + mutex_unlock(&pool->lock); return; + } dev_dbg(pool->dev, "Destroying domain pool with %u domains\n", pool->size); diff --git a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-pm.c b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-pm.c index 43d9654..54589e0 100644 --- a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-pm.c +++ b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-pm.c @@ -228,8 +228,7 @@ void gcip_pm_shutdown(struct gcip_pm *pm, bool force) if (pm->count) { if (!force) goto unlock; - else - dev_warn(pm->dev, "Force shutdown with power up count: %d", pm->count); + dev_warn(pm->dev, "Force shutdown with power up count: %d", pm->count); } gcip_pm_try_power_down(pm); diff --git a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-thermal.c b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-thermal.c new file mode 100644 index 0000000..5afa65e --- /dev/null +++ b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-thermal.c @@ -0,0 +1,517 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Thermal management support for GCIP devices. + * + * Copyright (C) 2023 Google LLC + */ + +#include <linux/debugfs.h> +#include <linux/device.h> +#include <linux/minmax.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/thermal.h> +#include <linux/version.h> + +#include <gcip/gcip-pm.h> +#include <gcip/gcip-thermal.h> + +#define OF_DATA_NUM_MAX (GCIP_THERMAL_MAX_NUM_STATES * 2) + +#define to_cdev(dev) container_of(dev, struct thermal_cooling_device, device) +#define to_gcip_thermal(dev) ((struct gcip_thermal *)to_cdev(dev)->devdata) + +/* Struct for state to rate and state to power mappings. */ +struct gcip_rate_pwr { + unsigned long rate; + u32 power; +}; + +static struct gcip_rate_pwr state_map[GCIP_THERMAL_MAX_NUM_STATES] = { 0 }; + +static int gcip_thermal_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct gcip_thermal *thermal = cdev->devdata; + + if (!thermal->num_states) + return -ENODEV; + + *state = thermal->num_states - 1; + + return 0; +} + +static int gcip_thermal_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) +{ + struct gcip_thermal *thermal = cdev->devdata; + + mutex_lock(&thermal->lock); + *state = thermal->state; + mutex_unlock(&thermal->lock); + + return 0; +} + +static int gcip_thermal_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state) +{ + struct gcip_thermal *thermal = cdev->devdata; + int i, ret = 0; + + if (state >= thermal->num_states) { + dev_err(thermal->dev, "Invalid thermal cooling state %lu\n", state); + return -EINVAL; + } + + mutex_lock(&thermal->lock); + + thermal->vote[GCIP_THERMAL_COOLING_DEVICE] = state; + for (i = 0; i < GCIP_THERMAL_MAX_NUM_VOTERS; i++) + state = max(state, thermal->vote[i]); + + if (state == thermal->state) + goto out; + + if (!gcip_pm_get_if_powered(thermal->pm, false)) { + ret = thermal->set_rate(thermal->data, state_map[state].rate); + gcip_pm_put(thermal->pm); + } + + if (ret) + dev_err(thermal->dev, "Failed to set thermal cooling state: %d\n", ret); + else + thermal->state = state; +out: + mutex_unlock(&thermal->lock); + + return ret; +} + +static int gcip_thermal_rate2power_internal(struct gcip_thermal *thermal, unsigned long rate, + u32 *power) +{ + int i; + + for (i = 0; i < thermal->num_states; i++) { + if (rate == state_map[i].rate) { + *power = state_map[i].power; + return 0; + } + } + + dev_err(thermal->dev, "Unknown rate for: %lu\n", rate); + *power = 0; + + return -EINVAL; +} + +static int gcip_thermal_get_requested_power(struct thermal_cooling_device *cdev, u32 *power) +{ + struct gcip_thermal *thermal = cdev->devdata; + unsigned long rate; + int ret; + + if (gcip_pm_get_if_powered(thermal->pm, false)) { + *power = 0; + return 0; + } + + mutex_lock(&thermal->lock); + + ret = thermal->get_rate(thermal->data, &rate); + + mutex_unlock(&thermal->lock); + gcip_pm_put(thermal->pm); + + if (ret) + return ret; + + return gcip_thermal_rate2power_internal(thermal, rate, power); +} + +static int gcip_thermal_state2power(struct thermal_cooling_device *cdev, unsigned long state, + u32 *power) +{ + struct gcip_thermal *thermal = cdev->devdata; + + if (state >= thermal->num_states) { + dev_err(thermal->dev, "Invalid state: %lu\n", state); + return -EINVAL; + } + + return gcip_thermal_rate2power_internal(thermal, state_map[state].rate, power); +} + +static int gcip_thermal_power2state(struct thermal_cooling_device *cdev, u32 power, + unsigned long *state) +{ + struct gcip_thermal *thermal = cdev->devdata; + + if (!thermal->num_states) + return -ENODEV; + + /* + * Argument "power" is the maximum allowed power consumption in mW as defined by the PID + * control loop. Checks for the first state that is less than or equal to the current + * allowed power. state_map is descending, so lowest power consumption is last value in the + * array. Returns lowest state even if it consumes more power than allowed as not all + * platforms can handle throttling below an active state. + */ + for (*state = 0; *state < thermal->num_states; (*state)++) + if (power >= state_map[*state].power) + return 0; + + *state = thermal->num_states - 1; + + return 0; +} + +static const struct thermal_cooling_device_ops gcip_thermal_ops = { + .get_max_state = gcip_thermal_get_max_state, + .get_cur_state = gcip_thermal_get_cur_state, + .set_cur_state = gcip_thermal_set_cur_state, + .get_requested_power = gcip_thermal_get_requested_power, + .state2power = gcip_thermal_state2power, + .power2state = gcip_thermal_power2state, +}; + +/* This API was removed, but Android still uses it to update thermal request. */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 12, 0) && IS_ENABLED(CONFIG_ANDROID) +void thermal_cdev_update(struct thermal_cooling_device *cdev); +#endif + +static void gcip_thermal_update(struct gcip_thermal *thermal) +{ + struct thermal_cooling_device *cdev = thermal->cdev; + + cdev->updated = false; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) || IS_ENABLED(CONFIG_ANDROID) + thermal_cdev_update(cdev); +#elif IS_ENABLED(CONFIG_THERMAL) + dev_err_once(dev, "Thermal update not implemented"); +#endif +} + +static ssize_t user_vote_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct gcip_thermal *thermal = to_gcip_thermal(dev); + ssize_t ret; + + if (!thermal) + return -ENODEV; + + mutex_lock(&thermal->lock); + ret = sysfs_emit(buf, "%lu\n", thermal->vote[GCIP_THERMAL_SYSFS]); + mutex_unlock(&thermal->lock); + + return ret; +} + +static ssize_t user_vote_store(struct device *dev, struct device_attribute *attr, const char *buf, + size_t count) +{ + struct gcip_thermal *thermal = to_gcip_thermal(dev); + unsigned long state; + int ret; + + if (!thermal) + return -ENODEV; + + ret = kstrtoul(buf, 0, &state); + if (ret) + return ret; + + if (state >= thermal->num_states) + return -EINVAL; + + mutex_lock(&thermal->lock); + thermal->vote[GCIP_THERMAL_SYSFS] = state; + mutex_unlock(&thermal->lock); + + gcip_thermal_update(thermal); + + return count; +} + +static DEVICE_ATTR_RW(user_vote); + +static int gcip_thermal_rate2state(struct gcip_thermal *thermal, unsigned long rate) +{ + int i; + + for (i = 0; i < thermal->num_states; i++) { + if (state_map[i].rate <= rate) + return i; + } + + /* Returns lowest state on an invalid input. */ + return thermal->num_states - 1; +} + +static int gcip_thermal_notifier(struct notifier_block *nb, unsigned long rate, void *nb_data) +{ + struct gcip_thermal *thermal = container_of(nb, struct gcip_thermal, nb); + unsigned long state = gcip_thermal_rate2state(thermal, rate); + + dev_dbg(thermal->dev, "Thermal notifier req original: %lu, state: %lu\n", rate, state); + + mutex_lock(&thermal->lock); + thermal->vote[GCIP_THERMAL_NOTIFIER_BLOCK] = state; + mutex_unlock(&thermal->lock); + + gcip_thermal_update(thermal); + + return NOTIFY_OK; +} + +struct notifier_block *gcip_thermal_get_notifier_block(struct gcip_thermal *thermal) +{ + if (IS_ERR_OR_NULL(thermal)) + return NULL; + + return &thermal->nb; +} + +void gcip_thermal_destroy(struct gcip_thermal *thermal) +{ + if (IS_ERR_OR_NULL(thermal)) + return; + + debugfs_remove_recursive(thermal->dentry); + thermal_cooling_device_unregister(thermal->cdev); + devm_kfree(thermal->dev, thermal); +} + +static int gcip_thermal_enable_get(void *data, u64 *val) +{ + struct gcip_thermal *thermal = (struct gcip_thermal *)data; + + mutex_lock(&thermal->lock); + *val = thermal->enabled; + mutex_unlock(&thermal->lock); + + return 0; +} + +static int gcip_thermal_enable_set(void *data, u64 val) +{ + struct gcip_thermal *thermal = (struct gcip_thermal *)data; + int ret = 0; + + mutex_lock(&thermal->lock); + + if (thermal->enabled != (bool)val) { + /* + * If the device is not powered, the value will be restored by + * gcip_thermal_restore_on_powering in next fw boot. + */ + if (!gcip_pm_get_if_powered(thermal->pm, false)) { + ret = thermal->control(thermal->data, val); + gcip_pm_put(thermal->pm); + } + + if (!ret) { + thermal->enabled = val; + dev_info_ratelimited(thermal->dev, "%s thermal control", + thermal->enabled ? "Enable" : "Disable"); + } else { + dev_err(thermal->dev, "Failed to %s thermal control: %d ", + val ? "enable" : "disable", ret); + } + } + + mutex_unlock(&thermal->lock); + + return ret; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_gcip_thermal_enable, gcip_thermal_enable_get, gcip_thermal_enable_set, + "%llu\n"); + +static int gcip_thermal_parse_dvfs_table(struct gcip_thermal *thermal) +{ + int row_size, col_size, tbl_size, i; + int of_data_int_array[OF_DATA_NUM_MAX]; + + if (of_property_read_u32_array(thermal->dev->of_node, GCIP_THERMAL_TABLE_SIZE_NAME, + of_data_int_array, 2)) + goto error; + + row_size = of_data_int_array[0]; + col_size = of_data_int_array[1]; + tbl_size = row_size * col_size; + if (row_size > GCIP_THERMAL_MAX_NUM_STATES) { + dev_err(thermal->dev, "Too many states\n"); + goto error; + } + + if (tbl_size > OF_DATA_NUM_MAX) + goto error; + + if (of_property_read_u32_array(thermal->dev->of_node, GCIP_THERMAL_TABLE_NAME, + of_data_int_array, tbl_size)) + goto error; + + thermal->num_states = row_size; + for (i = 0; i < row_size; ++i) { + int idx = col_size * i; + + state_map[i].rate = of_data_int_array[idx]; + state_map[i].power = of_data_int_array[idx + 1]; + } + + return 0; + +error: + dev_err(thermal->dev, "Failed to parse DVFS table\n"); + + return -EINVAL; +} + +static int gcip_thermal_cooling_register(struct gcip_thermal *thermal, const char *type, + const char *node_name) +{ + struct device_node *node = NULL; + int ret; + + ret = gcip_thermal_parse_dvfs_table(thermal); + if (ret) + return ret; + + if (node_name) + node = of_find_node_by_name(NULL, node_name); + if (!node) + dev_warn(thermal->dev, "Failed to find thermal cooling node\n"); + + thermal->cdev = thermal_of_cooling_device_register(node, type, thermal, &gcip_thermal_ops); + if (IS_ERR(thermal->cdev)) + return PTR_ERR(thermal->cdev); + + ret = device_create_file(&thermal->cdev->device, &dev_attr_user_vote); + if (ret) + thermal_cooling_device_unregister(thermal->cdev); + + return ret; +} + +struct gcip_thermal *gcip_thermal_create(const struct gcip_thermal_args *args) +{ + struct gcip_thermal *thermal; + int ret; + + if (!args->dev || !args->get_rate || !args->set_rate || !args->control) + return ERR_PTR(-EINVAL); + + thermal = devm_kzalloc(args->dev, sizeof(*thermal), GFP_KERNEL); + if (!thermal) + return ERR_PTR(-ENOMEM); + + thermal->dev = args->dev; + thermal->nb.notifier_call = gcip_thermal_notifier; + thermal->pm = args->pm; + thermal->enabled = true; + thermal->data = args->data; + thermal->get_rate = args->get_rate; + thermal->set_rate = args->set_rate; + thermal->control = args->control; + + mutex_init(&thermal->lock); + + ret = gcip_thermal_cooling_register(thermal, args->type, args->node_name); + if (ret) { + dev_err(args->dev, "Failed to initialize external thermal cooling\n"); + devm_kfree(args->dev, thermal); + return ERR_PTR(ret); + } + + thermal->dentry = debugfs_create_dir("cooling", args->dentry); + /* Don't let debugfs creation failure abort the init procedure. */ + if (IS_ERR_OR_NULL(thermal->dentry)) + dev_warn(args->dev, "Failed to create debugfs for thermal cooling"); + else + debugfs_create_file("enable", 0660, thermal->dentry, thermal, + &fops_gcip_thermal_enable); + + return thermal; +} + +int gcip_thermal_suspend_device(struct gcip_thermal *thermal) +{ + int ret = 0; + + if (IS_ERR_OR_NULL(thermal)) + return 0; + + mutex_lock(&thermal->lock); + + /* + * Always sets as suspended even when the request cannot be handled for unknown reasons + * because we still want to prevent the client from using device. + */ + thermal->device_suspended = true; + if (!gcip_pm_get_if_powered(thermal->pm, false)) { + ret = thermal->set_rate(thermal->data, 0); + gcip_pm_put(thermal->pm); + } + + mutex_unlock(&thermal->lock); + + return ret; +} + +int gcip_thermal_resume_device(struct gcip_thermal *thermal) +{ + int ret = 0; + + if (IS_ERR_OR_NULL(thermal)) + return 0; + + mutex_lock(&thermal->lock); + + if (!gcip_pm_get_if_powered(thermal->pm, false)) { + ret = thermal->set_rate(thermal->data, state_map[thermal->state].rate); + gcip_pm_put(thermal->pm); + } + + /* + * Unlike gcip_thermal_suspend_device(), only sets the device as resumed if the request is + * fulfilled. + */ + if (!ret) + thermal->device_suspended = false; + + mutex_unlock(&thermal->lock); + + return ret; +} + +bool gcip_thermal_is_device_suspended(struct gcip_thermal *thermal) +{ + if (IS_ERR_OR_NULL(thermal)) + return false; + + return thermal->device_suspended; +} + +int gcip_thermal_restore_on_powering(struct gcip_thermal *thermal) +{ + int ret = 0; + + if (IS_ERR_OR_NULL(thermal)) + return 0; + + gcip_pm_lockdep_assert_held(thermal->pm); + mutex_lock(&thermal->lock); + + if (!thermal->enabled) + ret = thermal->control(thermal->data, thermal->enabled); + else if (thermal->device_suspended) + ret = thermal->set_rate(thermal->data, 0); + else if (thermal->state) + /* Skips state 0 since it's the default thermal state. */ + ret = thermal->set_rate(thermal->data, state_map[thermal->state].rate); + + mutex_unlock(&thermal->lock); + + return ret; +} diff --git a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-domain-pool.h b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-domain-pool.h index b740bf9..a5441a9 100644 --- a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-domain-pool.h +++ b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-domain-pool.h @@ -8,8 +8,11 @@ #ifndef __GCIP_DOMAIN_POOL_H__ #define __GCIP_DOMAIN_POOL_H__ +#include <linux/device.h> #include <linux/idr.h> #include <linux/iommu.h> +#include <linux/mutex.h> +#include <linux/types.h> struct gcip_domain_pool { struct ida idp; /* ID allocator to keep track of used domains. */ @@ -20,6 +23,8 @@ struct gcip_domain_pool { unsigned int size; struct iommu_domain **array; /* Array holding the pointers to pre-allocated domains. */ struct device *dev; /* The device used for logging warnings/errors. */ + struct list_head dynamic_domains; /* Tracks dynamically allocated domains for cleanup. */ + struct mutex lock; /* Protects dynamic_domains. */ }; /* diff --git a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h index 8cf4353..b48317b 100644 --- a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h +++ b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h @@ -44,9 +44,10 @@ enum gcip_fw_flavor { GCIP_FW_FLAVOR_CUSTOM = 4, }; -/* Type of firmware crash which will be sent by GCIP_RKCI_FIRMWARE_CRASH RKCI command. */ +/* Type of firmware crash. */ enum gcip_fw_crash_type { - /* Assert happened. */ + /* Type which will be sent by GCIP_RKCI_FIRMWARE_CRASH reverse KCI. */ + /*Assert happened. */ GCIP_FW_CRASH_ASSERT_FAIL = 0, /* Data abort exception. */ GCIP_FW_CRASH_DATA_ABORT = 1, @@ -58,6 +59,9 @@ enum gcip_fw_crash_type { GCIP_FW_CRASH_UNRECOVERABLE_FAULT = 4, /* Used in debug dump. */ GCIP_FW_CRASH_DUMMY_CRASH_TYPE = 0xFF, + + /* HW watchdog timeout. */ + GCIP_FW_CRASH_HW_WDG_TIMEOUT = 0x100, }; /* Firmware info filled out via KCI FIRMWARE_INFO command. */ diff --git a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-thermal.h b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-thermal.h new file mode 100644 index 0000000..f742705 --- /dev/null +++ b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-thermal.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Thermal management support for GCIP devices. + * + * Copyright (C) 2023 Google LLC + */ + +#ifndef __GCIP_THERMAL_H__ +#define __GCIP_THERMAL_H__ + +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/notifier.h> +#include <linux/thermal.h> + +#define GCIP_THERMAL_TABLE_SIZE_NAME "gcip-dvfs-table-size" +#define GCIP_THERMAL_TABLE_NAME "gcip-dvfs-table" +#define GCIP_THERMAL_MAX_NUM_STATES 10 + +enum gcip_thermal_voter { + GCIP_THERMAL_COOLING_DEVICE, + GCIP_THERMAL_SYSFS, + GCIP_THERMAL_NOTIFIER_BLOCK, + + /* Keeps as the last entry for the total number of voters. */ + GCIP_THERMAL_MAX_NUM_VOTERS, +}; + +struct gcip_thermal { + struct device *dev; + struct thermal_cooling_device *cdev; + struct notifier_block nb; + struct dentry *dentry; + struct gcip_pm *pm; + + /* + * Lock to protect the struct members listed below. + * + * Note that since the request of thermal state adjusting might happen during power state + * transitions (i.e., another thread calling gcip_thermal_restore() with pm lock held), one + * must either use the non-blocking gcip_pm_get_if_powered() or make sure there won't be any + * new power transition after holding this thermal lock to prevent deadlock. + */ + struct mutex lock; + unsigned long num_states; + unsigned long state; + unsigned long vote[GCIP_THERMAL_MAX_NUM_VOTERS]; + bool device_suspended; + bool enabled; + + /* Private data. See struct gcip_thermal_args.*/ + void *data; + + /* Callbacks. See struct gcip_thermal_args. */ + int (*get_rate)(void *data, unsigned long *rate); + int (*set_rate)(void *data, unsigned long rate); + int (*control)(void *data, bool enable); +}; + +/* Arguments for devm_gcip_thermal_create. */ +struct gcip_thermal_args { + /* Device struct of GCIP device. */ + struct device *dev; + /* GCIP power management. */ + struct gcip_pm *pm; + /* Top-level debugfs directory for the device. */ + struct dentry *dentry; + /* Name of the thermal cooling-device node in device tree. */ + const char *node_name; + /* Thermal cooling device type for thermal_of_cooling_device_register() . */ + const char *type; + /* Private data for callbacks listed below. */ + void *data; + /* + * Callbacks listed below are called only if the device is powered and with the guarantee + * that there won't be any new power transition during the call (i.e., after + * gcip_pm_get_if_powered() succeeds or during the power up triggered by gcip_pm_get()) + * to prevent deadlock since they are called with thermal lock held. See the note about + * thermal lock in struct gcip_thermal. + */ + /* Callback to get the device clock rate. */ + int (*get_rate)(void *data, unsigned long *rate); + /* + * Callback to set the device clock rate. + * Might be called with pm lock held in gcip_thermal_restore_on_powering(). + */ + int (*set_rate)(void *data, unsigned long rate); + /* + * Callback to enable/disable the thermal control. + * Might be called with pm lock held in gcip_thermal_restore_on_powering(). + */ + int (*control)(void *data, bool enable); +}; + +/* Gets the notifier_block struct for thermal throttling requests. */ +struct notifier_block *gcip_thermal_get_notifier_block(struct gcip_thermal *thermal); +/* Allocates and initializes GCIP thermal struct. */ +struct gcip_thermal *gcip_thermal_create(const struct gcip_thermal_args *args); +/* Destroys and frees GCIP thermal struct. */ +void gcip_thermal_destroy(struct gcip_thermal *thermal); +/* Suspends the device due to thermal request. */ +int gcip_thermal_suspend_device(struct gcip_thermal *thermal); +/* Resumes the device and restores previous thermal state. */ +int gcip_thermal_resume_device(struct gcip_thermal *thermal); +/* + * Checks whether the device is suspended by thermal. + * Note that it's checked without thermal lock and state might change subsequently. + */ +bool gcip_thermal_is_device_suspended(struct gcip_thermal *thermal); +/* + * Restores the previous thermal state. + * + * This function is designed to restore the thermal state during power management calls and thus it + * assumes the caller holds the pm lock. + */ +int gcip_thermal_restore_on_powering(struct gcip_thermal *thermal); + +#endif /* __GCIP_THERMAL_H__ */ diff --git a/drivers/edgetpu/mobile-pm.c b/drivers/edgetpu/mobile-pm.c index 0a6b194..c4283f8 100644 --- a/drivers/edgetpu/mobile-pm.c +++ b/drivers/edgetpu/mobile-pm.c @@ -147,10 +147,14 @@ static int mobile_pwr_policy_set(void *data, u64 val) struct edgetpu_dev *etdev = (typeof(etdev))data; struct edgetpu_mobile_platform_dev *etmdev = to_mobile_dev(etdev); struct edgetpu_mobile_platform_pwr *platform_pwr = &etmdev->platform_pwr; - int ret; + int ret = -EAGAIN; mutex_lock(&platform_pwr->policy_lock); - ret = edgetpu_thermal_kci_if_powered(etdev, val); + + if (!gcip_pm_get_if_powered(etdev->pm, false)) { + ret = edgetpu_thermal_set_rate(etdev, val); + gcip_pm_put(etdev->pm); + } if (ret) { dev_err(etmdev->edgetpu_dev.dev, diff --git a/drivers/edgetpu/mobile-soc-gsx01.c b/drivers/edgetpu/mobile-soc-gsx01.c index d4163ba..143841d 100644 --- a/drivers/edgetpu/mobile-soc-gsx01.c +++ b/drivers/edgetpu/mobile-soc-gsx01.c @@ -2,13 +2,14 @@ /* * Edge TPU functions for GSX01 SoCs. * - * Copyright (C) 2022 Google LLC + * Copyright (C) 2022-2023 Google LLC */ #include <linux/acpm_dvfs.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/gsa/gsa_tpu.h> +#include <linux/notifier.h> #include <linux/platform_device.h> #include <linux/thermal.h> #include <linux/types.h> @@ -17,13 +18,13 @@ #include <soc/google/gs_tmu_v3.h> #include <gcip/gcip-pm.h> +#include <gcip/gcip-thermal.h> #include "edgetpu-internal.h" #include "edgetpu-firmware.h" #include "edgetpu-kci.h" #include "edgetpu-mobile-platform.h" #include "edgetpu-soc.h" -#include "edgetpu-thermal.h" #include "mobile-firmware.h" #include "mobile-soc-gsx01.h" @@ -574,14 +575,29 @@ static int tpu_pause_callback(enum thermal_pause_state action, void *dev) return ret; if (action == THERMAL_SUSPEND) - ret = edgetpu_thermal_suspend(dev); + ret = gcip_thermal_suspend_device(dev); else if (action == THERMAL_RESUME) - ret = edgetpu_thermal_resume(dev); + ret = gcip_thermal_resume_device(dev); return ret; } -void edgetpu_soc_thermal_init(struct edgetpu_thermal *thermal) +void edgetpu_soc_thermal_init(struct edgetpu_dev *etdev) { + struct gcip_thermal *thermal = etdev->thermal; + struct notifier_block *nb = gcip_thermal_get_notifier_block(thermal); + register_tpu_thermal_pause_cb(tpu_pause_callback, thermal->dev); + + if (etdev->soc_data->bcl_dev) + exynos_pm_qos_add_notifier(PM_QOS_TPU_FREQ_MAX, nb); +} + +void edgetpu_soc_thermal_exit(struct edgetpu_dev *etdev) +{ + struct gcip_thermal *thermal = etdev->thermal; + struct notifier_block *nb = gcip_thermal_get_notifier_block(thermal); + + if (etdev->soc_data->bcl_dev) + exynos_pm_qos_remove_notifier(PM_QOS_TPU_FREQ_MAX, nb); } diff --git a/drivers/edgetpu/mobile-thermal.c b/drivers/edgetpu/mobile-thermal.c deleted file mode 100644 index 34813e9..0000000 --- a/drivers/edgetpu/mobile-thermal.c +++ /dev/null @@ -1,555 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Common EdgeTPU mobile thermal management support - * - * Copyright (C) 2021 Google, Inc. - */ - -#include <linux/debugfs.h> -#include <linux/device.h> -#include <linux/gfp.h> -#include <linux/kernel.h> -#include <linux/mutex.h> -#include <linux/of.h> -#include <linux/platform_device.h> -#include <linux/pm_runtime.h> -#include <linux/slab.h> -#include <linux/thermal.h> -#include <linux/version.h> - -#include <gcip/gcip-pm.h> - -#include "edgetpu-config.h" -#include "edgetpu-internal.h" -#include "edgetpu-kci.h" -#include "edgetpu-mmu.h" -#include "edgetpu-soc.h" -#include "edgetpu-thermal.h" -#include "mobile-pm.h" - -#define MAX_NUM_TPU_STATES 10 -#define OF_DATA_NUM_MAX (MAX_NUM_TPU_STATES * 2) -static struct edgetpu_state_pwr state_pwr_map[MAX_NUM_TPU_STATES] = {0}; - -static int edgetpu_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) -{ - struct edgetpu_thermal *thermal = cdev->devdata; - - if (thermal->tpu_num_states <= 0) - return -ENODEV; - - *state = thermal->tpu_num_states - 1; - return 0; -} - -static int __edgetpu_set_cur_state(struct thermal_cooling_device *cdev, - unsigned long state_original, int voter_id) -{ - int ret; - struct edgetpu_thermal *thermal = cdev->devdata; - struct device *dev = thermal->dev; - unsigned long pwr_state; - - if (state_original >= thermal->tpu_num_states) { - dev_err(dev, "%s: invalid cooling state %lu\n", __func__, state_original); - return -EINVAL; - } - - edgetpu_thermal_lock(thermal); - thermal->thermal_vote[voter_id] = state_original; - - state_original = max(thermal->thermal_vote[0], thermal->thermal_vote[1]); - state_original = max(thermal->sysfs_req, state_original); - - pwr_state = state_pwr_map[state_original].state; - if (state_original == thermal->cooling_state) { - ret = -EALREADY; - goto out; - } - - /* - * Set the thermal policy to allow cooling by DVFS. Any states lower - * than UUD should be handled by firmware when it gets the throttling - * notification KCI. - */ - if (pwr_state < TPU_ACTIVE_UUD) { - dev_warn_ratelimited(dev, - "Setting lowest DVFS state, waiting for FW to shutdown TPU"); - ret = edgetpu_thermal_kci_if_powered(thermal->etdev, TPU_ACTIVE_UUD); - } else { - ret = edgetpu_thermal_kci_if_powered(thermal->etdev, pwr_state); - } - - if (ret) { - dev_err(dev, "error setting tpu policy: %d\n", ret); - goto out; - } - thermal->cooling_state = state_original; -out: - edgetpu_thermal_unlock(thermal); - return ret; -} - -/* - * Set cooling state. - */ -static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state_original) -{ - return __edgetpu_set_cur_state(cdev, state_original, 0); -} - -int edgetpu_set_cur_state_bcl(struct thermal_cooling_device *cdev, unsigned long state_original) -{ - return __edgetpu_set_cur_state(cdev, state_original, 1); -} - -static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state) -{ - int ret = 0; - struct edgetpu_thermal *thermal = cdev->devdata; - - *state = thermal->cooling_state; - if (*state < thermal->tpu_num_states) - return 0; - - dev_warn(thermal->dev, "Unknown cooling state: %lu, resetting\n", *state); - edgetpu_thermal_lock(thermal); - - ret = edgetpu_thermal_kci_if_powered(thermal->etdev, TPU_ACTIVE_NOM); - if (ret) { - dev_err(thermal->dev, "error setting tpu policy: %d\n", ret); - edgetpu_thermal_unlock(thermal); - return ret; - } - - /* setting back to "no cooling" */ - thermal->cooling_state = 0; - edgetpu_thermal_unlock(thermal); - - return 0; -} - -static int edgetpu_state2power_internal(unsigned long state, u32 *power, - struct edgetpu_thermal *thermal) -{ - int i; - - for (i = 0; i < thermal->tpu_num_states; ++i) { - if (state == state_pwr_map[i].state) { - *power = state_pwr_map[i].power; - return 0; - } - } - dev_err(thermal->dev, "Unknown state req for: %lu\n", state); - *power = 0; - return -EINVAL; -} - -static int edgetpu_get_requested_power(struct thermal_cooling_device *cdev, u32 *power) -{ - unsigned long state_original; - struct edgetpu_thermal *thermal = cdev->devdata; - - state_original = edgetpu_soc_pm_get_rate(thermal->etdev, 0); - return edgetpu_state2power_internal(state_original, power, thermal); -} - -static int edgetpu_state2power(struct thermal_cooling_device *cdev, unsigned long state, u32 *power) -{ - struct edgetpu_thermal *thermal = cdev->devdata; - - if (state >= thermal->tpu_num_states) { - dev_err(thermal->dev, "%s: invalid state: %lu\n", __func__, state); - return -EINVAL; - } - - return edgetpu_state2power_internal(state_pwr_map[state].state, power, thermal); -} - -static int edgetpu_power2state(struct thermal_cooling_device *cdev, u32 power, unsigned long *state) -{ - int i, penultimate_throttle_state; - struct edgetpu_thermal *thermal = cdev->devdata; - - *state = 0; - if (thermal->tpu_num_states < 2) - return thermal->tpu_num_states == 1 ? 0 : -ENODEV; - - penultimate_throttle_state = thermal->tpu_num_states - 2; - /* - * argument "power" is the maximum allowed power consumption in mW as - * defined by the PID control loop. Check for the first state that is - * less than or equal to the current allowed power. state_pwr_map is - * descending, so lowest power consumption is last value in the array - * return lowest state even if it consumes more power than allowed as - * not all platforms can handle throttling below an active state - */ - for (i = penultimate_throttle_state; i >= 0; --i) { - if (power < state_pwr_map[i].power) { - *state = i + 1; - break; - } - } - return 0; -} - -static struct thermal_cooling_device_ops edgetpu_cooling_ops = { - .get_max_state = edgetpu_get_max_state, - .get_cur_state = edgetpu_get_cur_state, - .set_cur_state = edgetpu_set_cur_state, - .get_requested_power = edgetpu_get_requested_power, - .state2power = edgetpu_state2power, - .power2state = edgetpu_power2state, -}; - -static void tpu_thermal_exit_cooling(struct edgetpu_thermal *thermal) -{ - if (!IS_ERR_OR_NULL(thermal->cdev)) - thermal_cooling_device_unregister(thermal->cdev); -} - -static void tpu_thermal_exit(struct edgetpu_thermal *thermal) -{ - tpu_thermal_exit_cooling(thermal); - debugfs_remove_recursive(thermal->cooling_root); -} - -static void devm_tpu_thermal_release(struct device *dev, void *res) -{ - struct edgetpu_thermal *thermal = res; - - tpu_thermal_exit(thermal); -} - -static int tpu_thermal_parse_dvfs_table(struct edgetpu_thermal *thermal) -{ - int row_size, col_size, tbl_size, i; - int of_data_int_array[OF_DATA_NUM_MAX]; - - if (of_property_read_u32_array(thermal->dev->of_node, "tpu_dvfs_table_size", - of_data_int_array, 2)) - goto error; - - row_size = of_data_int_array[0]; - col_size = of_data_int_array[1]; - tbl_size = row_size * col_size; - if (row_size > MAX_NUM_TPU_STATES) { - dev_err(thermal->dev, "too many TPU states\n"); - goto error; - } - - if (tbl_size > OF_DATA_NUM_MAX) - goto error; - - if (of_property_read_u32_array(thermal->dev->of_node, "tpu_dvfs_table", of_data_int_array, - tbl_size)) - goto error; - - thermal->tpu_num_states = row_size; - for (i = 0; i < row_size; ++i) { - int idx = col_size * i; - - state_pwr_map[i].state = of_data_int_array[idx]; - state_pwr_map[i].power = of_data_int_array[idx + 1]; - } - - return 0; - -error: - dev_err(thermal->dev, "failed to parse DVFS table\n"); - return -EINVAL; -} - -static ssize_t user_vote_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct thermal_cooling_device *cdev = - container_of(dev, struct thermal_cooling_device, device); - struct edgetpu_thermal *thermal = cdev->devdata; - - if (!thermal) - return -ENODEV; - - return sysfs_emit(buf, "%lu\n", thermal->sysfs_req); -} - -static ssize_t user_vote_store(struct device *dev, struct device_attribute *attr, const char *buf, - size_t count) -{ - struct thermal_cooling_device *cdev = - container_of(dev, struct thermal_cooling_device, device); - struct edgetpu_thermal *thermal = cdev->devdata; - int ret; - unsigned long state; - - if (!thermal) - return -ENODEV; - - ret = kstrtoul(buf, 0, &state); - if (ret) - return ret; - - if (state >= thermal->tpu_num_states) - return -EINVAL; - - mutex_lock(&cdev->lock); - thermal->sysfs_req = state; - cdev->updated = false; - mutex_unlock(&cdev->lock); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 12, 0) - thermal_cdev_update(cdev); -#elif IS_ENABLED(CONFIG_THERMAL) - dev_err(dev, "Thermal update not implemented"); -#endif - - return count; -} - -static DEVICE_ATTR_RW(user_vote); - -static int edgetpu_thermal_control_kci(struct edgetpu_dev *etdev, bool enable) -{ - int ret; - - ret = edgetpu_kci_thermal_control(etdev, enable); - if (ret) - etdev_err_ratelimited(etdev, "Failed to %s the thermal, error:%d", - enable ? "enable" : "disable", ret); - - return ret; -} - -static int edgetpu_thermal_control_kci_if_powered(struct edgetpu_dev *etdev, bool enable) -{ - int ret; - - if (!gcip_pm_get_if_powered(etdev->pm, false)) - return -EAGAIN; - - ret = edgetpu_thermal_control_kci(etdev, enable); - - gcip_pm_put(etdev->pm); - - return ret; -} - -static int thermal_enable_get(void *data, u64 *val) -{ - struct edgetpu_thermal *thermal = (struct edgetpu_thermal *)data; - - edgetpu_thermal_lock(thermal); - *val = thermal->enabled; - edgetpu_thermal_unlock(thermal); - - return 0; -} - -static int thermal_enable_set(void *data, u64 val) -{ - struct edgetpu_thermal *thermal = (struct edgetpu_thermal *)data; - int ret = 0; - - edgetpu_thermal_lock(thermal); - - if (thermal->enabled != (bool)val) { - ret = edgetpu_thermal_control_kci_if_powered(thermal->etdev, val); - /* - * -EAGAIN means the fw is not powered and the value will be restored by - * edgetpu_thermal_restore in next fw boot. - */ - if (!ret || ret == -EAGAIN) { - ret = 0; - thermal->enabled = val; - } - } - - edgetpu_thermal_unlock(thermal); - - return ret; -} - -DEFINE_DEBUGFS_ATTRIBUTE(fops_thermal_enable, thermal_enable_get, thermal_enable_set, "%llu\n"); - -static int tpu_thermal_cooling_register(struct edgetpu_thermal *thermal, char *type) -{ - struct device_node *cooling_node = NULL; - int err = 0; - - thermal->op_data = NULL; - thermal->tpu_num_states = 0; - - err = tpu_thermal_parse_dvfs_table(thermal); - if (err) - return err; - - mutex_init(&thermal->lock); - cooling_node = of_find_node_by_name(NULL, "tpu-cooling"); - if (!cooling_node) - dev_warn(thermal->dev, "failed to find cooling node\n"); - /* Initialize the cooling state as 0, means "no cooling" */ - thermal->cooling_state = 0; - thermal->cdev = thermal_of_cooling_device_register(cooling_node, type, thermal, - &edgetpu_cooling_ops); - if (IS_ERR(thermal->cdev)) - return PTR_ERR(thermal->cdev); - - return device_create_file(&thermal->cdev->device, &dev_attr_user_vote); -} - -static int tpu_thermal_init(struct edgetpu_thermal *thermal, struct device *dev) -{ - int err; - struct dentry *d; - - thermal->dev = dev; - thermal->enabled = true; - - d = debugfs_create_dir("cooling", edgetpu_fs_debugfs_dir()); - /* don't let debugfs creation failure abort the init procedure */ - if (IS_ERR_OR_NULL(d)) { - dev_warn(dev, "failed to create debug fs for cooling"); - thermal->cooling_root = NULL; - } else { - thermal->cooling_root = d; - debugfs_create_file("enable", 0660, thermal->cooling_root, thermal, - &fops_thermal_enable); - } - - err = tpu_thermal_cooling_register(thermal, EDGETPU_COOLING_NAME); - if (err) { - dev_err(dev, "failed to initialize external cooling\n"); - tpu_thermal_exit(thermal); - return err; - } - - edgetpu_soc_thermal_init(thermal); - return 0; -} - -struct edgetpu_thermal *devm_tpu_thermal_create(struct device *dev, struct edgetpu_dev *etdev) -{ - struct edgetpu_thermal *thermal; - int err; - - thermal = devres_alloc(devm_tpu_thermal_release, sizeof(*thermal), GFP_KERNEL); - if (!thermal) - return ERR_PTR(-ENOMEM); - - thermal->etdev = etdev; - err = tpu_thermal_init(thermal, dev); - if (err) { - devres_free(thermal); - return ERR_PTR(err); - } - - devres_add(dev, thermal); - return thermal; -} - -int edgetpu_thermal_suspend(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct edgetpu_dev *etdev = platform_get_drvdata(pdev); - struct edgetpu_thermal *thermal = etdev->thermal; - int ret = 0; - - if (IS_ERR(thermal)) - return PTR_ERR(thermal); - edgetpu_thermal_lock(thermal); - /* - * Always set as suspended even when the FW cannot handle the KCI (it's dead for some - * unknown reasons) because we still want to prevent the runtime from using TPU. - */ - thermal->thermal_suspended = true; - ret = edgetpu_thermal_kci_if_powered(etdev, TPU_OFF); - edgetpu_thermal_unlock(thermal); - return ret; -} - -int edgetpu_thermal_resume(struct device *dev) -{ - struct platform_device *pdev = to_platform_device(dev); - struct edgetpu_dev *etdev = platform_get_drvdata(pdev); - struct edgetpu_thermal *thermal = etdev->thermal; - int ret = 0; - - if (IS_ERR(thermal)) - return PTR_ERR(thermal); - edgetpu_thermal_lock(thermal); - - if (thermal->cooling_state >= thermal->tpu_num_states) - thermal->cooling_state = 0; - - ret = edgetpu_thermal_kci_if_powered(etdev, state_pwr_map[thermal->cooling_state].state); - /* - * Unlike edgetpu_thermal_suspend(), only set the device is resumed if the FW handled the - * KCI request. - */ - if (!ret) - thermal->thermal_suspended = false; - edgetpu_thermal_unlock(thermal); - return ret; -} - -static int edgetpu_thermal_kci(struct edgetpu_dev *etdev, u32 state) -{ - int ret; - - edgetpu_kci_block_bus_speed_control(etdev, true); - - ret = edgetpu_kci_notify_throttling(etdev, state); - if (ret) - etdev_err_ratelimited(etdev, "Failed to notify FW about power state %u, error:%d", - state, ret); - - edgetpu_kci_block_bus_speed_control(etdev, false); - - return ret; -} - -int edgetpu_thermal_kci_if_powered(struct edgetpu_dev *etdev, u32 state) -{ - int ret; - - if (gcip_pm_get_if_powered(etdev->pm, false)) - return -EAGAIN; - - ret = edgetpu_thermal_kci(etdev, state); - gcip_pm_put(etdev->pm); - - return ret; -} - -int edgetpu_thermal_restore(struct edgetpu_dev *etdev) -{ - struct edgetpu_thermal *thermal = etdev->thermal; - int ret = 0; - - if (IS_ERR_OR_NULL(thermal)) - return 0; - - edgetpu_thermal_lock(thermal); - - if (!thermal->enabled) - ret = edgetpu_thermal_control_kci(etdev, thermal->enabled); - else if (edgetpu_thermal_is_suspended(thermal)) - ret = edgetpu_thermal_kci(etdev, TPU_OFF); - else if (thermal->cooling_state) - ret = edgetpu_thermal_kci(etdev, state_pwr_map[thermal->cooling_state].state); - - edgetpu_thermal_unlock(thermal); - - return ret; -} - -int edgetpu_state_to_cooling(struct edgetpu_dev *etdev, unsigned long state) -{ - struct edgetpu_thermal *thermal = etdev->thermal; - int i = 0; - - for (i = 0; i < thermal->tpu_num_states; i++) { - if (state_pwr_map[i].state <= state) - return i; - } - return thermal->tpu_num_states - 1; -} diff --git a/drivers/edgetpu/rio-pm.c b/drivers/edgetpu/rio-pm.c index 81eb133..ac771ec 100644 --- a/drivers/edgetpu/rio-pm.c +++ b/drivers/edgetpu/rio-pm.c @@ -7,9 +7,7 @@ #include <linux/delay.h> #include <linux/iopoll.h> -#include <linux/notifier.h> #include <soc/google/bcl.h> -#include <soc/google/exynos_pm_qos.h> #include "edgetpu-config.h" #include "edgetpu-internal.h" @@ -144,22 +142,6 @@ static int rio_lpm_up(struct edgetpu_dev *etdev) return 0; } -static int rio_pmqos_notifier(struct notifier_block *nb, unsigned long state_freq, - void *nb_data) -{ - struct edgetpu_dev *etdev; - int state_cooling; - int ret; - - etdev = container_of(nb, struct edgetpu_dev, pmqos_nb); - state_cooling = edgetpu_state_to_cooling(etdev, state_freq); - etdev_dbg(etdev, "pmqos req original: %ld, cooling: %d\n", state_freq, state_cooling); - ret = edgetpu_set_cur_state_bcl(etdev->thermal->cdev, state_cooling); - if (ret) - etdev_err_ratelimited(etdev, "Error in BCL throttling %d\n", ret); - return NOTIFY_OK; -} - static bool rio_is_block_down(struct edgetpu_dev *etdev) { struct edgetpu_mobile_platform_dev *etmdev = to_mobile_dev(etdev); @@ -196,10 +178,6 @@ int edgetpu_chip_pm_create(struct edgetpu_dev *etdev) platform_pwr->post_fw_start = rio_post_fw_start; etdev->soc_data->bcl_dev = google_retrieve_bcl_handle(); - if (etdev->soc_data->bcl_dev) { - etdev->pmqos_nb.notifier_call = rio_pmqos_notifier; - exynos_pm_qos_add_notifier(PM_QOS_TPU_FREQ_MAX, &etdev->pmqos_nb); - } return edgetpu_mobile_pm_create(etdev); } diff --git a/drivers/edgetpu/rio-thermal.c b/drivers/edgetpu/rio-thermal.c deleted file mode 100644 index deb763f..0000000 --- a/drivers/edgetpu/rio-thermal.c +++ /dev/null @@ -1,2 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "mobile-thermal.c" |