summaryrefslogtreecommitdiff
path: root/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c
diff options
context:
space:
mode:
Diffstat (limited to 'bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c')
-rw-r--r--bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c442
1 files changed, 442 insertions, 0 deletions
diff --git a/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c
new file mode 100644
index 0000000..188e53b
--- /dev/null
+++ b/bifrost/r25p0/kernel/drivers/gpu/arm/midgard/mali_kbase_dummy_job_wa.c
@@ -0,0 +1,442 @@
+/*
+ *
+ * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU licence.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ */
+
+/*
+ * Implementation of the dummy job execution workaround for the GPU hang issue.
+ */
+
+#include <mali_kbase.h>
+#include <backend/gpu/mali_kbase_device_internal.h>
+#include <mali_kbase_dummy_job_wa.h>
+
+#include <linux/firmware.h>
+#include <linux/delay.h>
+
+#define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa"
+
+struct wa_header {
+ u16 signature;
+ u16 version;
+ u32 info_offset;
+} __packed;
+
+struct wa_v2_info {
+ u64 jc;
+ u32 js;
+ u32 blob_offset;
+ u64 flags;
+} __packed;
+
+struct wa_blob {
+ u64 base;
+ u32 size;
+ u32 map_flags;
+ u32 payload_offset;
+ u32 blob_offset;
+} __packed;
+
+static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz)
+{
+ return !(end - base - off < sz);
+}
+
+static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits)
+{
+ int loop;
+ const int timeout = 100;
+ u32 val;
+
+ for (loop = 0; loop < timeout; loop++) {
+ val = kbase_reg_read(kbdev, offset);
+ if (val & bits)
+ break;
+ udelay(10);
+ }
+
+ if (loop == timeout) {
+ dev_err(kbdev->dev,
+ "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n",
+ (unsigned long)offset, (unsigned long)bits,
+ (unsigned long)val);
+ }
+
+ return (val & bits);
+}
+
+static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set)
+{
+ int loop;
+ const int timeout = 100;
+ u32 val;
+ u32 target = 0;
+
+ if (set)
+ target = bits;
+
+ for (loop = 0; loop < timeout; loop++) {
+ val = kbase_reg_read(kbdev, (offset));
+ if ((val & bits) == target)
+ break;
+
+ udelay(10);
+ }
+
+ if (loop == timeout) {
+ dev_err(kbdev->dev,
+ "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n",
+ (unsigned long)offset, (unsigned long)bits,
+ (unsigned long)val);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static inline int run_job(struct kbase_device *kbdev, int as, int slot,
+ u64 cores, u64 jc)
+{
+ u32 done;
+
+ /* setup job */
+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO),
+ jc & U32_MAX);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI),
+ jc >> 32);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO),
+ cores & U32_MAX);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI),
+ cores >> 32);
+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT),
+ JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as);
+
+ /* go */
+ kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT),
+ JS_COMMAND_START);
+
+ /* wait for the slot to finish (done, error) */
+ done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT),
+ (1ul << (16+slot)) | (1ul << slot));
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done);
+
+ if (done != (1ul << slot)) {
+ dev_err(kbdev->dev,
+ "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n",
+ slot, (unsigned long long)cores,
+ (unsigned long)done);
+ dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n",
+ kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS)));
+
+ return -EFAULT;
+ } else {
+ return 0;
+ }
+}
+
+/* To be called after power up & MMU init, but before everything else */
+int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
+{
+ int as;
+ int slot;
+ u64 jc;
+ int failed = 0;
+ int runs = 0;
+ u32 old_gpu_mask;
+ u32 old_job_mask;
+
+ if (!kbdev)
+ return -EFAULT;
+
+ if (!kbdev->dummy_job_wa.ctx)
+ return -EFAULT;
+
+ as = kbdev->dummy_job_wa.ctx->as_nr;
+ slot = kbdev->dummy_job_wa.slot;
+ jc = kbdev->dummy_job_wa.jc;
+
+ /* mask off all but MMU IRQs */
+ old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
+ old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK));
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
+
+ /* power up requested cores */
+ kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX));
+ kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32));
+
+ if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) {
+ /* wait for power-ups */
+ wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true);
+ if (cores >> 32)
+ wait(kbdev, SHADER_READY_HI, (cores >> 32), true);
+ }
+
+ if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) {
+ int i;
+
+ /* do for each requested core */
+ for (i = 0; i < sizeof(cores) * 8; i++) {
+ u64 affinity;
+
+ affinity = 1ull << i;
+
+ if (!(cores & affinity))
+ continue;
+
+ if (run_job(kbdev, as, slot, affinity, jc))
+ failed++;
+ runs++;
+ }
+
+ } else {
+ if (run_job(kbdev, as, slot, cores, jc))
+ failed++;
+ runs++;
+ }
+
+ if (kbdev->dummy_job_wa.flags &
+ KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) {
+ /* power off shader cores (to reduce any dynamic leakage) */
+ kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX));
+ kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32));
+
+ /* wait for power off complete */
+ wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false);
+ wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false);
+ if (cores >> 32) {
+ wait(kbdev, SHADER_READY_HI, (cores >> 32), false);
+ wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false);
+ }
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX);
+ }
+
+ /* restore IRQ masks */
+ kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask);
+ kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask);
+
+ if (failed)
+ dev_err(kbdev->dev,
+ "WA complete with %d failures out of %d runs\n", failed,
+ runs);
+
+ return failed ? -EFAULT : 0;
+}
+
+static ssize_t show_dummy_job_wa_info(struct device * const dev,
+ struct device_attribute * const attr, char * const buf)
+{
+ struct kbase_device *const kbdev = dev_get_drvdata(dev);
+ int err;
+
+ if (!kbdev || !kbdev->dummy_job_wa.ctx)
+ return -ENODEV;
+
+ err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n",
+ kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags);
+
+ return err;
+}
+
+static DEVICE_ATTR(dummy_job_wa_info, 0444, show_dummy_job_wa_info, NULL);
+
+static bool wa_blob_load_needed(struct kbase_device *kbdev)
+{
+ if (of_machine_is_compatible("arm,juno"))
+ return false;
+
+ if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485))
+ return true;
+
+ return false;
+}
+
+int kbase_dummy_job_wa_load(struct kbase_device *kbdev)
+{
+ const struct firmware *firmware;
+ static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME;
+ const u32 signature = 0x4157;
+ const u32 version = 2;
+ const u8 *fw_end;
+ const u8 *fw;
+ const struct wa_header *header;
+ const struct wa_v2_info *v2_info;
+ u32 blob_offset;
+ int err;
+ struct kbase_context *kctx;
+
+ if (!wa_blob_load_needed(kbdev))
+ return 0;
+
+ /* load the wa */
+ err = request_firmware(&firmware, wa_name, kbdev->dev);
+
+ if (err) {
+ dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, "
+ "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed");
+ return -ENODEV;
+ }
+
+ kctx = kbase_create_context(kbdev, true,
+ BASE_CONTEXT_CREATE_FLAG_NONE, 0,
+ NULL);
+
+ if (!kctx) {
+ dev_err(kbdev->dev, "Failed to create WA context\n");
+ goto no_ctx;
+ }
+
+ fw = firmware->data;
+ fw_end = fw + firmware->size;
+
+ dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n",
+ firmware->size);
+
+ if (!in_range(fw, fw_end, 0, sizeof(*header))) {
+ dev_err(kbdev->dev, "WA too small\n");
+ goto bad_fw;
+ }
+
+ header = (const struct wa_header *)(fw + 0);
+
+ if (header->signature != signature) {
+ dev_err(kbdev->dev, "WA signature failure: 0x%lx\n",
+ (unsigned long)header->signature);
+ goto bad_fw;
+ }
+
+ if (header->version != version) {
+ dev_err(kbdev->dev, "WA version 0x%lx not supported\n",
+ (unsigned long)header->version);
+ goto bad_fw;
+ }
+
+ if (!in_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) {
+ dev_err(kbdev->dev, "WA info offset out of bounds\n");
+ goto bad_fw;
+ }
+
+ v2_info = (const struct wa_v2_info *)(fw + header->info_offset);
+
+ if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) {
+ dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n",
+ (unsigned long long)v2_info->flags);
+ goto bad_fw;
+ }
+
+ kbdev->dummy_job_wa.slot = v2_info->js;
+ kbdev->dummy_job_wa.jc = v2_info->jc;
+ kbdev->dummy_job_wa.flags = v2_info->flags;
+
+ blob_offset = v2_info->blob_offset;
+
+ while (blob_offset) {
+ const struct wa_blob *blob;
+ size_t nr_pages;
+ u64 flags;
+ u64 gpu_va;
+ struct kbase_va_region *va_region;
+
+ if (!in_range(fw, fw_end, blob_offset, sizeof(*blob))) {
+ dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n",
+ (unsigned long)blob_offset);
+ goto bad_fw;
+ }
+
+ blob = (const struct wa_blob *)(fw + blob_offset);
+ if (!in_range(fw, fw_end, blob->payload_offset, blob->size)) {
+ dev_err(kbdev->dev, "Payload out-of-bounds\n");
+ goto bad_fw;
+ }
+
+ gpu_va = blob->base;
+ if (PAGE_ALIGN(gpu_va) != gpu_va) {
+ dev_err(kbdev->dev, "blob not page aligned\n");
+ goto bad_fw;
+ }
+ nr_pages = PFN_UP(blob->size);
+ flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED;
+
+ va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages,
+ 0, &flags, &gpu_va);
+
+ if (!va_region) {
+ dev_err(kbdev->dev, "Failed to allocate for blob\n");
+ } else {
+ struct kbase_vmap_struct vmap = { 0 };
+ const u8 *payload;
+ void *dst;
+
+ /* copy the payload, */
+ payload = fw + blob->payload_offset;
+
+ dst = kbase_vmap(kctx,
+ va_region->start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT, &vmap);
+
+ if (dst) {
+ memcpy(dst, payload, blob->size);
+ kbase_vunmap(kctx, &vmap);
+ } else {
+ dev_err(kbdev->dev,
+ "Failed to copy payload\n");
+ }
+
+ }
+ blob_offset = blob->blob_offset; /* follow chain */
+ }
+
+ release_firmware(firmware);
+
+ kbasep_js_schedule_privileged_ctx(kbdev, kctx);
+
+ kbdev->dummy_job_wa.ctx = kctx;
+
+ err = sysfs_create_file(&kbdev->dev->kobj,
+ &dev_attr_dummy_job_wa_info.attr);
+ if (err)
+ dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n");
+
+ return 0;
+
+bad_fw:
+ kbase_destroy_context(kctx);
+no_ctx:
+ release_firmware(firmware);
+ return -EFAULT;
+}
+
+void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev)
+{
+ struct kbase_context *wa_ctx;
+
+ /* Can be safely called even if the file wasn't created on probe */
+ sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr);
+
+ wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx);
+ WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL);
+ /* make this write visible before we tear down the ctx */
+ smp_mb();
+
+ if (wa_ctx) {
+ kbasep_js_release_privileged_ctx(kbdev, wa_ctx);
+ kbase_destroy_context(wa_ctx);
+ }
+}