gxp: [Copybara Auto Merge] Merge branch 'zuma' into 'android14-gs-pixel-5.15'

gcip: temporary disable gcip-iommu GCIP_MAIN_REV_ID: e1d3d01f0c2bb5da0b7cc986321c70287138c8ef gxp: remove non-carveout scratchpad region support Bug: 265562894 gxp: set default work mode to MCU Bug: 272600756 gxp: Add ftrace events Bug: 271933339 gxp: fix compiler warning in gxp-thermal gcip: implement a function returning default IOMMU domain Bug: 243479562 gcip: implement map/unmap in legacy mode Bug: 243479562 (repeat) gcip: implement gcip_iommu_domain_{map,unmap}_sg Bug: 243479562 (repeat) gcip: implement gcip_iommu_domain_pool_{alloc,free}_domain Bug: 243479562 (repeat) gcip: add granule alignment functions Bug: 243479562 (repeat) gcip: introduce gcip_iommu_domain_type Bug: 243479562 (repeat) gcip: implement funcs of gcip_iommu_domain_pool Bug: 243479562 (repeat) GCIP_MAIN_REV_ID: 848032f4c19ee517b799498630dc9b0f5cf30bf0 gxp: move fw_rw_section into if-guard gxp: fix memory leak with invalid telemetry type Bug: 273254318 gxp: remove unneeded checks in vd.c gxp: skip configuring when missing VD config gxp: remove unused host-dev structs Bug: 265748027 gxp: remove unused range-alloc module gxp: entirely remove app_metadata Bug: 265748027 (repeat) gxp: remove legacy firmware data init Bug: 265748027 (repeat) gxp: remove legacy telemetry descriptor Bug: 265748027 (repeat) gxp: move system cfg population to device probe Bug: 265748027 (repeat) gxp: add gxp_fw_data_system_cfg Bug: 265748027 (repeat) gxp: add gxp_fw_data_resource Bug: 265748027 (repeat) gcip: kci: add usage-stats metrics v1 / v2 commands gcip: pm power_up callback add comments for thermal suspend suggestion gcip: introduce a function returning default IOMMU domain Bug: 243479562 (repeat) gcip: introduce gcip_iommu_domain_{map,unmap}_sg Bug: 243479562 (repeat) gcip: introduce gcip_iommu_domain_pool_{alloc,free}_domain Bug: 243479562 (repeat) gcip: introduce gcip_iommu_domain_ops Bug: 243479562 (repeat) gcip: introduce gcip_iommu_domain Bug: 243479562 (repeat) gcip: introduce gcip_iommu_domain_type Bug: 243479562 (repeat) gcip: introduce gcip_iommu_domain_pool and its funcs Bug: 243479562 (repeat) GCIP_HEADERS_REV_ID: b3f05e2139e6215ce8e4ad90d9fa621c8084d53c GitOrigin-RevId: 543bd26edbc76d22cff7e16627955e7a66f78e59 Change-Id: I3dbe4170269ba87c9f411fb1fbf5b6e3c9e55fe5
author: Aurora zuma automerger <aurora-zuma-automerger@google.com> 2023-03-14 18:01:36 +0000
committer: Copybara-Service <copybara-worker@google.com> 2023-03-14 11:28:21 -0700
commit: 7d833613c6706f5fd9c0eec66685ee44b0a664b4 (patch)
tree: 0f489702358518be1091d34c24e9a94b3bbcba59
parent: f12c18aa9d19fb142475e5bdc3cba671491ba033 (diff)
download: zuma-7d833613c6706f5fd9c0eec66685ee44b0a664b4.tar.gz
21 files changed, 1114 insertions, 1495 deletions
diff --git a/Makefile b/Makefile
index c6e4d2b..151a194 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,6 @@ gxp-objs += \
 		gxp-mapping.o \
 		gxp-mb-notification.o \
 		gxp-pm.o \
-		gxp-range-alloc.o \
 		gxp-ssmt.o \
 		gxp-thermal.o \
 		gxp-vd.o
diff --git a/gcip-kernel-driver/drivers/gcip/gcip-iommu.c b/gcip-kernel-driver/drivers/gcip/gcip-iommu.c
new file mode 100644
index 0000000..2e0dac6
--- /dev/null
+++ b/gcip-kernel-driver/drivers/gcip/gcip-iommu.c
@@ -0,0 +1,432 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Manages GCIP IOMMU domains and allocates/maps IOVAs.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-iommu.h>
+#include <linux/dma-mapping.h>
+#include <linux/genalloc.h>
+#include <linux/iova.h>
+#include <linux/log2.h>
+#include <linux/of.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include <gcip/gcip-domain-pool.h>
+#include <gcip/gcip-iommu.h>
+#include <gcip/gcip-mem-pool.h>
+
+#define HAS_IOVAD_BEST_FIT_ALGO (IS_ENABLED(CONFIG_GCIP_TEST) || IS_ENABLED(CONFIG_ANDROID))
+
+/* Macros for manipulating @gcip_map_flags parameter. */
+#define GCIP_MAP_FLAGS_GET_VALUE(ATTR, flags)                                                      \
+	(((flags) >> (GCIP_MAP_FLAGS_##ATTR##_OFFSET)) &                                           \
+	 (BIT_ULL(GCIP_MAP_FLAGS_##ATTR##_BIT_SIZE) - 1))
+#define GCIP_MAP_FLAGS_GET_DMA_DIRECTION(flags) GCIP_MAP_FLAGS_GET_VALUE(DMA_DIRECTION, flags)
+#define GCIP_MAP_FLAGS_GET_DMA_COHERENT(flags) GCIP_MAP_FLAGS_GET_VALUE(DMA_COHERENT, flags)
+#define GCIP_MAP_FLAGS_GET_DMA_ATTR(flags) GCIP_MAP_FLAGS_GET_VALUE(DMA_ATTR, flags)
+
+/**
+ * dma_info_to_prot - Translate DMA API directions and attributes to IOMMU API
+ *                    page flags.
+ * @dir: Direction of DMA transfer
+ * @coherent: If true, create coherent mappings of the scatterlist.
+ * @attrs: DMA attributes for the mapping
+ *
+ * See v5.15.94/source/drivers/iommu/dma-iommu.c#L418
+ *
+ * Return: corresponding IOMMU API page protection flags
+ */
+static int dma_info_to_prot(enum dma_data_direction dir, bool coherent, unsigned long attrs)
+{
+	int prot = coherent ? IOMMU_CACHE : 0;
+
+	if (attrs & DMA_ATTR_PRIVILEGED)
+		prot |= IOMMU_PRIV;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		return prot | IOMMU_READ | IOMMU_WRITE;
+	case DMA_TO_DEVICE:
+		return prot | IOMMU_READ;
+	case DMA_FROM_DEVICE:
+		return prot | IOMMU_WRITE;
+	default:
+		return 0;
+	}
+}
+
+static inline unsigned long gcip_iommu_domain_shift(struct gcip_iommu_domain *domain)
+{
+	return __ffs(domain->domain_pool->granule);
+}
+
+static inline unsigned long gcip_iommu_domain_pfn(struct gcip_iommu_domain *domain, dma_addr_t iova)
+{
+	return iova >> gcip_iommu_domain_shift(domain);
+}
+
+static inline size_t gcip_iommu_domain_align(struct gcip_iommu_domain *domain, size_t size)
+{
+	return ALIGN(size, domain->domain_pool->granule);
+}
+
+static int iovad_initialize_domain(struct gcip_iommu_domain *domain)
+{
+	struct gcip_iommu_domain_pool *dpool = domain->domain_pool;
+
+	init_iova_domain(&domain->iova_space.iovad, dpool->granule,
+			 max_t(unsigned long, 1, dpool->base_daddr >> ilog2(dpool->granule)));
+
+	return 0;
+}
+
+static void iovad_finalize_domain(struct gcip_iommu_domain *domain)
+{
+	put_iova_domain(&domain->iova_space.iovad);
+}
+
+static void iovad_enable_best_fit_algo(struct gcip_iommu_domain *domain)
+{
+#if HAS_IOVAD_BEST_FIT_ALGO
+	domain->iova_space.iovad.best_fit = true;
+#endif /* HAS_IOVAD_BEST_FIT_ALGO */
+}
+
+static dma_addr_t iovad_alloc_iova_space(struct gcip_iommu_domain *domain, size_t size)
+{
+	unsigned long iova, shift = gcip_iommu_domain_shift(domain);
+
+	iova = alloc_iova_fast(&domain->iova_space.iovad, size >> shift,
+			       domain->domain_pool->last_daddr >> shift, true);
+
+	return (dma_addr_t)iova << shift;
+}
+
+static void iovad_free_iova_space(struct gcip_iommu_domain *domain, dma_addr_t iova, size_t size)
+{
+	free_iova_fast(&domain->iova_space.iovad, gcip_iommu_domain_pfn(domain, iova),
+		       size >> gcip_iommu_domain_shift(domain));
+}
+
+static const struct gcip_iommu_domain_ops iovad_ops = {
+	.initialize_domain = iovad_initialize_domain,
+	.finalize_domain = iovad_finalize_domain,
+	.enable_best_fit_algo = iovad_enable_best_fit_algo,
+	.alloc_iova_space = iovad_alloc_iova_space,
+	.free_iova_space = iovad_free_iova_space,
+};
+
+static int mem_pool_initialize_domain(struct gcip_iommu_domain *domain)
+{
+	struct gcip_iommu_domain_pool *dpool = domain->domain_pool;
+	int ret;
+
+	ret = gcip_mem_pool_init(&domain->iova_space.mem_pool, dpool->dev, dpool->base_daddr,
+				 dpool->size, dpool->granule);
+
+	return ret;
+}
+
+static void mem_pool_finalize_domain(struct gcip_iommu_domain *domain)
+{
+	gcip_mem_pool_exit(&domain->iova_space.mem_pool);
+}
+
+static void mem_pool_enable_best_fit_algo(struct gcip_iommu_domain *domain)
+{
+	gen_pool_set_algo(domain->iova_space.mem_pool.gen_pool, gen_pool_best_fit, NULL);
+}
+
+static dma_addr_t mem_pool_alloc_iova_space(struct gcip_iommu_domain *domain, size_t size)
+{
+	return (dma_addr_t)gcip_mem_pool_alloc(&domain->iova_space.mem_pool, size);
+}
+
+static void mem_pool_free_iova_space(struct gcip_iommu_domain *domain, dma_addr_t iova, size_t size)
+{
+	gcip_mem_pool_free(&domain->iova_space.mem_pool, iova, size);
+}
+
+static const struct gcip_iommu_domain_ops mem_pool_ops = {
+	.initialize_domain = mem_pool_initialize_domain,
+	.finalize_domain = mem_pool_finalize_domain,
+	.enable_best_fit_algo = mem_pool_enable_best_fit_algo,
+	.alloc_iova_space = mem_pool_alloc_iova_space,
+	.free_iova_space = mem_pool_free_iova_space,
+};
+
+static bool enable_best_fit_algo_legacy(struct gcip_iommu_domain_pool *pool)
+{
+	__maybe_unused int ret;
+
+#if HAS_IOVAD_BEST_FIT_ALGO
+	ret = iommu_dma_enable_best_fit_algo(pool->dev);
+	if (!ret)
+		return true;
+	dev_warn(pool->dev, "Failed to enable best-fit IOMMU domain pool (%d)\n", ret);
+#else
+	dev_warn(pool->dev, "This env doesn't support best-fit algorithm in the legacy mode");
+#endif
+	return false;
+}
+
+static ssize_t dma_iommu_map_sg(struct gcip_iommu_domain *domain, struct scatterlist *sgl,
+				int nents, enum dma_data_direction dir, unsigned long attrs,
+				int prot)
+{
+	int nents_mapped;
+	dma_addr_t iova;
+	ssize_t ret;
+
+	nents_mapped = dma_map_sg_attrs(domain->dev, sgl, nents, dir, attrs);
+	if (!nents_mapped)
+		return 0;
+
+	iova = sg_dma_address(sgl);
+
+	ret = (ssize_t)iommu_map_sg(domain->domain, iova, sgl, nents, prot);
+	if (ret <= 0) {
+		dma_unmap_sg_attrs(domain->dev, sgl, nents, dir, attrs);
+		return 0;
+	}
+
+	return nents_mapped;
+}
+
+static void dma_iommu_unmap_sg(struct gcip_iommu_domain *domain, struct scatterlist *sgl, int nents,
+			       enum dma_data_direction dir, unsigned long attrs)
+{
+	struct scatterlist *sg;
+	size_t size = 0;
+	int i;
+
+	for_each_sg (sgl, sg, nents, i)
+		size += sg_dma_len(sg);
+
+	if (!iommu_unmap(domain->domain, sg_dma_address(sgl), size))
+		dev_warn(domain->dev, "Failed to unmap sg");
+	dma_unmap_sg_attrs(domain->dev, sgl, nents, dir, attrs);
+}
+
+int gcip_iommu_domain_pool_init(struct gcip_iommu_domain_pool *pool, struct device *dev,
+				dma_addr_t base_daddr, size_t iova_space_size, size_t granule,
+				unsigned int num_domains, enum gcip_iommu_domain_type domain_type)
+{
+	const __be32 *user_window;
+	int ret;
+
+	ret = gcip_domain_pool_init(dev, &pool->domain_pool, num_domains);
+	if (ret)
+		return ret;
+
+	pool->dev = dev;
+	pool->base_daddr = base_daddr;
+	pool->size = iova_space_size;
+	pool->granule = granule;
+	pool->best_fit = false;
+	pool->domain_type = domain_type;
+
+	if (!base_daddr || !iova_space_size) {
+		user_window = of_get_property(dev->of_node, "gcip-dma-window", NULL);
+		if (!user_window) {
+			dev_warn(dev, "Failed to find gcip-dma-window property");
+		} else {
+			pool->base_daddr = of_read_number(user_window, 1);
+			pool->size = of_read_number(user_window + 1, 1);
+		}
+	}
+
+	if (!pool->base_daddr || !pool->size) {
+		dev_warn(dev, "GCIP IOMMU domain pool is initialized as the legacy mode");
+		pool->size = 0;
+	} else {
+		pool->last_daddr = pool->base_daddr + pool->size - 1;
+	}
+
+	dev_dbg(dev, "Init GCIP IOMMU domain pool, base_daddr=%#llx, size=%#zx", pool->base_daddr,
+		pool->size);
+
+	return 0;
+}
+
+void gcip_iommu_domain_pool_destroy(struct gcip_iommu_domain_pool *pool)
+{
+	gcip_domain_pool_destroy(&pool->domain_pool);
+}
+
+void gcip_iommu_domain_pool_enable_best_fit_algo(struct gcip_iommu_domain_pool *pool)
+{
+	if (gcip_iommu_domain_pool_is_legacy_mode(pool)) {
+		pool->best_fit = enable_best_fit_algo_legacy(pool);
+	} else if (pool->domain_type == GCIP_IOMMU_DOMAIN_TYPE_IOVAD && !HAS_IOVAD_BEST_FIT_ALGO) {
+		dev_warn(pool->dev, "This env doesn't support best-fit algorithm with IOVAD");
+		pool->best_fit = false;
+	} else {
+		pool->best_fit = true;
+	}
+}
+
+void gcip_iommu_domain_pool_enable_legacy_mode(struct gcip_iommu_domain_pool *pool)
+{
+	pool->size = 0;
+	pool->base_daddr = 0;
+
+	if (pool->best_fit)
+		pool->best_fit = enable_best_fit_algo_legacy(pool);
+}
+
+struct gcip_iommu_domain *gcip_iommu_domain_pool_alloc_domain(struct gcip_iommu_domain_pool *pool)
+{
+	struct gcip_iommu_domain *gdomain;
+	int ret;
+
+	gdomain = devm_kzalloc(pool->dev, sizeof(*gdomain), GFP_KERNEL);
+	if (!gdomain)
+		return ERR_PTR(-ENOMEM);
+
+	gdomain->dev = pool->dev;
+	gdomain->domain_pool = pool;
+	gdomain->domain = gcip_domain_pool_alloc(&pool->domain_pool);
+	if (IS_ERR_OR_NULL(gdomain->domain)) {
+		ret = -ENOMEM;
+		goto err_free_gdomain;
+	}
+
+	if (gcip_iommu_domain_pool_is_legacy_mode(pool)) {
+		gdomain->legacy_mode = true;
+		return gdomain;
+	}
+
+	switch (pool->domain_type) {
+	case GCIP_IOMMU_DOMAIN_TYPE_IOVAD:
+		gdomain->ops = &iovad_ops;
+		break;
+	case GCIP_IOMMU_DOMAIN_TYPE_MEM_POOL:
+		gdomain->ops = &mem_pool_ops;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err_free_domain_pool;
+	}
+
+	ret = gdomain->ops->initialize_domain(gdomain);
+	if (ret)
+		goto err_free_domain_pool;
+
+	if (pool->best_fit)
+		gdomain->ops->enable_best_fit_algo(gdomain);
+
+	return gdomain;
+
+err_free_domain_pool:
+	gcip_domain_pool_free(&pool->domain_pool, gdomain->domain);
+err_free_gdomain:
+	devm_kfree(pool->dev, gdomain);
+	return ERR_PTR(ret);
+}
+
+void gcip_iommu_domain_pool_free_domain(struct gcip_iommu_domain_pool *pool,
+					struct gcip_iommu_domain *domain)
+{
+	if (!gcip_iommu_domain_is_legacy_mode(domain))
+		domain->ops->finalize_domain(domain);
+	gcip_domain_pool_free(&pool->domain_pool, domain->domain);
+	devm_kfree(pool->dev, domain);
+}
+
+unsigned int gcip_iommu_domain_map_sg(struct gcip_iommu_domain *domain, struct scatterlist *sgl,
+				      int nents, u64 gcip_map_flags)
+{
+	enum dma_data_direction dir = GCIP_MAP_FLAGS_GET_DMA_DIRECTION(gcip_map_flags);
+	bool coherent = GCIP_MAP_FLAGS_GET_DMA_COHERENT(gcip_map_flags);
+	unsigned long attrs = GCIP_MAP_FLAGS_GET_DMA_ATTR(gcip_map_flags);
+	int i, prot = dma_info_to_prot(dir, coherent, attrs);
+	struct scatterlist *sg;
+	dma_addr_t iova;
+	size_t iova_len = 0;
+	ssize_t ret;
+
+	if (gcip_iommu_domain_is_legacy_mode(domain))
+		return dma_iommu_map_sg(domain, sgl, nents, dir, attrs, prot);
+
+	/* Calculates how much IOVA space we need. */
+	for_each_sg (sgl, sg, nents, i)
+		iova_len += sg->length;
+
+	/* Allocates one continuous IOVA. */
+	iova = domain->ops->alloc_iova_space(domain, gcip_iommu_domain_align(domain, iova_len));
+	if (!iova)
+		return 0;
+
+	/*
+	 * Maps scatterlist to the allocated IOVA.
+	 *
+	 * It will iterate each scatter list segment in order and map them to the IOMMU domain
+	 * as amount of the size of each segment successively.
+	 * Returns an error on failure or the total length of mapped segments on success.
+	 *
+	 * Note: Before Linux 5.15, its return type was `size_t` and it returned 0 on failure.
+	 *       To make it compatible with those old versions, we should cast the return value.
+	 */
+	ret = (ssize_t)iommu_map_sg(domain->domain, iova, sgl, nents, prot);
+	if (ret < 0 || ret < iova_len)
+		goto err_free_iova;
+
+	/* Fills out the mapping information. */
+	sg_dma_address(sgl) = iova;
+	sg_dma_len(sgl) = iova_len;
+
+	/* As it put the whole mapping information to the first segment, it should return 1. */
+	return 1;
+
+err_free_iova:
+	domain->ops->free_iova_space(domain, iova, gcip_iommu_domain_align(domain, iova_len));
+	return 0;
+}
+
+void gcip_iommu_domain_unmap_sg(struct gcip_iommu_domain *domain, struct scatterlist *sgl,
+				int nents, u64 gcip_map_flags)
+{
+	dma_addr_t iova;
+	size_t iova_len;
+
+	if (gcip_iommu_domain_is_legacy_mode(domain)) {
+		enum dma_data_direction dir = GCIP_MAP_FLAGS_GET_DMA_DIRECTION(gcip_map_flags);
+		unsigned long attrs = GCIP_MAP_FLAGS_GET_DMA_ATTR(gcip_map_flags);
+
+		dma_iommu_unmap_sg(domain, sgl, nents, dir, attrs);
+		return;
+	}
+
+	iova = sg_dma_address(sgl);
+	iova_len = sg_dma_len(sgl);
+
+	iommu_unmap(domain->domain, iova, iova_len);
+	domain->ops->free_iova_space(domain, iova, gcip_iommu_domain_align(domain, iova_len));
+}
+
+struct gcip_iommu_domain *gcip_iommu_get_domain_for_dev(struct device *dev)
+{
+	struct gcip_iommu_domain *gdomain;
+
+	gdomain = devm_kzalloc(dev, sizeof(*gdomain), GFP_KERNEL);
+	if (!gdomain)
+		return ERR_PTR(-ENOMEM);
+
+	gdomain->domain = iommu_get_domain_for_dev(dev);
+	if (!gdomain->domain) {
+		devm_kfree(dev, gdomain);
+		return ERR_PTR(-ENODEV);
+	}
+
+	gdomain->dev = dev;
+	gdomain->legacy_mode = true;
+
+	return gdomain;
+}
diff --git a/gcip-kernel-driver/include/gcip/gcip-iommu.h b/gcip-kernel-driver/include/gcip/gcip-iommu.h
new file mode 100644
index 0000000..4e04b7e
--- /dev/null
+++ b/gcip-kernel-driver/include/gcip/gcip-iommu.h
@@ -0,0 +1,267 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Manages GCIP IOMMU domains and allocates/maps IOVAs.
+ *
+ * One can replace allocating IOVAs via Linux DMA interface which will allocate and map them to
+ * the default IOMMU domain with this framework. This framework will allocate and map IOVAs to the
+ * specific IOMMU domain directly. This has following two advantages:
+ *
+ * - Can remove the mapping time by once as it maps to the target IOMMU domain directly.
+ * - IOMMU domains don't have to share the total capacity.
+ *
+ * GCIP IOMMU domain is implemented by utilizing multiple kinds of IOVA space pool:
+ * - struct iova_domain
+ * - struct gcip_mem_pool
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#ifndef __GCIP_IOMMU_H__
+#define __GCIP_IOMMU_H__
+
+#include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/scatterlist.h>
+
+#include <gcip/gcip-domain-pool.h>
+#include <gcip/gcip-mem-pool.h>
+
+/*
+ * Helpers for manipulating @gcip_map_flags parameter of the `gcip_iommu_domain_{map,unmap}_sg`
+ * functions.
+ */
+#define GCIP_MAP_FLAGS_DMA_DIRECTION_OFFSET 0
+#define GCIP_MAP_FLAGS_DMA_DIRECTION_BIT_SIZE 2
+#define GCIP_MAP_FLAGS_DMA_DIRECTION_TO_FLAGS(dir)                                                 \
+	((u64)(dir) << GCIP_MAP_FLAGS_DMA_DIRECTION_OFFSET)
+
+#define GCIP_MAP_FLAGS_DMA_COHERENT_OFFSET                                                         \
+	(GCIP_MAP_FLAGS_DMA_DIRECTION_OFFSET + GCIP_MAP_FLAGS_DMA_DIRECTION_BIT_SIZE)
+#define GCIP_MAP_FLAGS_DMA_COHERENT_BIT_SIZE 1
+#define GCIP_MAP_FLAGS_DMA_COHERENT_TO_FLAGS(coherent)                                             \
+	((u64)(coherent) << GCIP_MAP_FLAGS_DMA_COHERENT_OFFSET)
+
+#define GCIP_MAP_FLAGS_DMA_ATTR_OFFSET                                                             \
+	(GCIP_MAP_FLAGS_DMA_COHERENT_OFFSET + GCIP_MAP_FLAGS_DMA_COHERENT_BIT_SIZE)
+#define GCIP_MAP_FLAGS_DMA_ATTR_BIT_SIZE 10
+#define GCIP_MAP_FLAGS_DMA_ATTR_TO_FLAGS(attr) ((u64)(attr) << GCIP_MAP_FLAGS_DMA_ATTR_OFFSET)
+
+struct gcip_iommu_domain_ops;
+
+/*
+ * Type of IOVA space pool that IOMMU domain will utilize.
+ * Regardless of the type, its functionality will be the same. However, its implementation might be
+ * different. For example, iova_domain uses red-black tree for the memory management, but gen_pool
+ * uses bitmap. Therefore, their performance might be different and the kernel drivers can choose
+ * which one to use according to its real use cases and the performance.
+ *
+ * Note: in legacy mode, only iova_domain is available as the Linux implementation utilizes that.
+ */
+enum gcip_iommu_domain_type {
+	/* Uses iova_domain. */
+	GCIP_IOMMU_DOMAIN_TYPE_IOVAD,
+	/* Uses gcip_mem_pool which is based on gen_pool. */
+	GCIP_IOMMU_DOMAIN_TYPE_MEM_POOL,
+};
+
+/*
+ * IOMMU domain pool.
+ *
+ * It manages the pool of IOMMU domains. Also, it specifies the base address and the size of IOMMU
+ * domains. Also, one can choose the data structure and algorithm of IOVA space management.
+ */
+struct gcip_iommu_domain_pool {
+	struct device *dev;
+	struct gcip_domain_pool domain_pool;
+	dma_addr_t base_daddr;
+	/* Will hold (base_daddr + size - 1) to prevent calculating it every IOVAD mappings. */
+	dma_addr_t last_daddr;
+	size_t size;
+	size_t granule;
+	bool best_fit;
+	enum gcip_iommu_domain_type domain_type;
+};
+
+/*
+ * Wrapper of iommu_domain.
+ * It has its own IOVA space pool based on iova_domain or gcip_mem_pool. One can choose one of them
+ * when calling the `gcip_iommu_domain_pool_init` function. See `enum gcip_iommu_domain_type`
+ * for details.
+ */
+struct gcip_iommu_domain {
+	struct device *dev;
+	struct gcip_iommu_domain_pool *domain_pool;
+	struct iommu_domain *domain;
+	bool legacy_mode;
+	union {
+		struct iova_domain iovad;
+		struct gcip_mem_pool mem_pool;
+	} iova_space;
+	const struct gcip_iommu_domain_ops *ops;
+};
+
+/*
+ * Holds operators which will be set according to the @domain_type.
+ * These callbacks will be filled automatically when a `struct gcip_iommu_domain` is allocated.
+ */
+struct gcip_iommu_domain_ops {
+	/* Initializes pool of @domain. */
+	int (*initialize_domain)(struct gcip_iommu_domain *domain);
+	/* Destroyes pool of @domain */
+	void (*finalize_domain)(struct gcip_iommu_domain *domain);
+	/*
+	 * Enables best-fit algorithm for the memory management.
+	 * Only domains which are allocated after calling this callback will be affected.
+	 */
+	void (*enable_best_fit_algo)(struct gcip_iommu_domain *domain);
+	/* Allocates @size of buffer and returns its IOVA. */
+	dma_addr_t (*alloc_iova_space)(struct gcip_iommu_domain *domain, size_t size);
+	/* Releases @size of buffer which was allocated to @iova. */
+	void (*free_iova_space)(struct gcip_iommu_domain *domain, dma_addr_t iova, size_t size);
+};
+
+/*
+ * Initializes an IOMMU domain pool.
+ *
+ * One can specify the base DMA address and IOVA space size via @base_daddr and @iova_space_size
+ * parameters. If any of them is 0, it will try to parse "gcip-dma-window" property from the device
+ * tree of @dev.
+ *
+ * If the base DMA address and IOVA space size are set successfully (i.e., larger than 0), IOMMU
+ * domains allocated by this domain pool will have their own IOVA space pool and will map buffers
+ * to their own IOMMU domain directly.
+ *
+ * Otherwise, it will fall into the legacy mode which will utilize the native DMA-IOMMU APIs.
+ * In this mode, it will map the buffer to the default IOMMU domain first and then remap it to the
+ * target domain.
+ *
+ * @pool: IOMMU domain pool to be initialized.
+ * @dev: Device where to parse "gcip-dma-window" property.
+ * @base_addr: The base address of IOVA space. Must be greater than 0 and a multiple of @granule.
+ * @iova_space_size: The size of the IOVA space. @size must be a multiple of @granule.
+ * @granule: The granule when invoking the IOMMU domain pool. Must be a power of 2.
+ * @num_domains: The number of IOMMU domains.
+ * @domain_type: Type of the IOMMU domain.
+ *
+ * Returns 0 on success or negative error value.
+ */
+int gcip_iommu_domain_pool_init(struct gcip_iommu_domain_pool *pool, struct device *dev,
+				dma_addr_t base_daddr, size_t iova_space_size, size_t granule,
+				unsigned int num_domains, enum gcip_iommu_domain_type domain_type);
+
+/*
+ * Destroys an IOMMU domain pool.
+ *
+ * @pool: IOMMU domain pool to be destroyed.
+ */
+void gcip_iommu_domain_pool_destroy(struct gcip_iommu_domain_pool *pool);
+
+/*
+ * Enables the best fit algorithm for allocating an IOVA space.
+ * It affects domains which are allocated after calling this function only.
+ *
+ * @pool: IOMMU domain pool to be enabled.
+ */
+void gcip_iommu_domain_pool_enable_best_fit_algo(struct gcip_iommu_domain_pool *pool);
+
+/*
+ * Enables the legacy mode of allocating and mapping IOVA logic which utilizes native DMA-IOMMU
+ * APIs of the Linux kernel.
+ * It affects domains which are allocated after calling this function only.
+ *
+ * @pool: IOMMU domain pool to be enabled.
+ */
+void gcip_iommu_domain_pool_enable_legacy_mode(struct gcip_iommu_domain_pool *pool);
+
+/*
+ * Returns whether @pool is using legacy mode or not.
+ *
+ * @pool: IOMMU domain pool to be checked.
+ */
+static inline bool gcip_iommu_domain_pool_is_legacy_mode(struct gcip_iommu_domain_pool *pool)
+{
+	return !(pool && pool->size);
+}
+
+/*
+ * Allocates a GCIP IOMMU domain.
+ *
+ * @pool: IOMMU domain pool.
+ *
+ * Returns a pointer of allocated domain on success or an error pointer on failure.
+ */
+struct gcip_iommu_domain *gcip_iommu_domain_pool_alloc_domain(struct gcip_iommu_domain_pool *pool);
+
+/*
+ * Releases a GCIP IOMMU domain.
+ *
+ * Before calling this function, you must unmap all IOVAs by calling `gcip_iommu_domain_unmap{_sg}`
+ * functions.
+ *
+ * @pool: IOMMU domain pool.
+ * @domain: GCIP IOMMU domain to be released.
+ */
+void gcip_iommu_domain_pool_free_domain(struct gcip_iommu_domain_pool *pool,
+					struct gcip_iommu_domain *domain);
+
+/*
+ * Returns whether @domain is using legacy mode or not.
+ *
+ * @domain: GCIP IOMMU domain to be checked.
+ */
+static inline bool gcip_iommu_domain_is_legacy_mode(struct gcip_iommu_domain *domain)
+{
+	return domain->legacy_mode;
+}
+
+/*
+ * Allocates an IOVA for the scatterlist and maps it to @domain.
+ *
+ * @domain: GCIP IOMMU domain which manages IOVA addresses.
+ * @sgl: Scatterlist to be mapped.
+ * @nents: The number of entries in @sgl.
+ * @gcip_map_flags: Flags indicating mapping attributes.
+ *
+ * Bitfields:
+ *   [1:0]   - DMA_DIRECTION:
+ *               00 = DMA_BIDIRECTIONAL (host/device can write buffer)
+ *               01 = DMA_TO_DEVICE     (host can write buffer)
+ *               10 = DMA_FROM_DEVICE   (device can write buffer)
+ *               (See https://docs.kernel.org/core-api/dma-api-howto.html#dma-direction)
+ *   [2:2]   - Coherent Mapping:
+ *               0 = Create non-coherent mappings of the buffer.
+ *               1 = Create coherent mappings of the buffer.
+ *   [12:3]  - DMA_ATTR:
+ *               Not used in the non-legacy mode.
+ *               (See https://www.kernel.org/doc/Documentation/core-api/dma-attributes.rst)
+ *   [63:13] - RESERVED
+ *               Set RESERVED bits to 0 to ensure backwards compatibility.
+ *
+ * One can use `GCIP_MAP_FLAGS_DMA_*_TO_FLAGS` macros to generate a flag.
+ *
+ * Returns the number of entries which are mapped to @domain. Returns 0 if it fails.
+ */
+unsigned int gcip_iommu_domain_map_sg(struct gcip_iommu_domain *domain, struct scatterlist *sgl,
+				      int nents, u64 gcip_map_flags);
+
+/*
+ * Unmaps an IOVA which was mapped for the scatterlist.
+ *
+ * @domain: GCIP IOMMU domain which manages IOVA addresses.
+ * @sgl: Scatterlist to be unmapped.
+ * @gcip_map_flags: The same as the `gcip_iommu_domain_map_sg` function.
+ *                  It will be ignored in the non-legacy mode.
+ */
+void gcip_iommu_domain_unmap_sg(struct gcip_iommu_domain *domain, struct scatterlist *sgl,
+				int nents, u64 gcip_map_flags);
+
+/*
+ * Returns a default GCIP IOMMU domain.
+ * This domain works with the legacy mode only.
+ *
+ * @dev: Device where to fetch the default IOMMU domain.
+ */
+struct gcip_iommu_domain *gcip_iommu_get_domain_for_dev(struct device *dev);
+
+#endif /* __GCIP_IOMMU_H__ */
diff --git a/gcip-kernel-driver/include/gcip/gcip-kci.h b/gcip-kernel-driver/include/gcip/gcip-kci.h
index eb83550..2aa721b 100644
--- a/gcip-kernel-driver/include/gcip/gcip-kci.h
+++ b/gcip-kernel-driver/include/gcip/gcip-kci.h
@@ -90,6 +90,9 @@ enum gcip_kci_code {
 	GCIP_KCI_CODE_OPEN_DEVICE = 9,
 	GCIP_KCI_CODE_CLOSE_DEVICE = 10,
 	GCIP_KCI_CODE_FIRMWARE_INFO = 11,
+	/* TODO(b/271372136): remove v1 when v1 firmware no longer in use. */
+	GCIP_KCI_CODE_GET_USAGE_V1 = 12,
+	/* Backward compatible define, also update when v1 firmware no longer in use. */
 	GCIP_KCI_CODE_GET_USAGE = 12,
 	GCIP_KCI_CODE_NOTIFY_THROTTLING = 13,
 	GCIP_KCI_CODE_BLOCK_BUS_SPEED_CONTROL = 14,
@@ -99,6 +102,7 @@ enum gcip_kci_code {
 	GCIP_KCI_CODE_UNLINK_OFFLOAD_VMBOX = 18,
 	GCIP_KCI_CODE_FIRMWARE_TRACING_LEVEL = 19,
 	GCIP_KCI_CODE_THERMAL_CONTROL = 20,
+	GCIP_KCI_CODE_GET_USAGE_V2 = 21,
 
 	GCIP_KCI_CODE_RKCI_ACK = 256,
 };
diff --git a/gcip-kernel-driver/include/gcip/gcip-pm.h b/gcip-kernel-driver/include/gcip/gcip-pm.h
index 4842598..1e6ce05 100644
--- a/gcip-kernel-driver/include/gcip/gcip-pm.h
+++ b/gcip-kernel-driver/include/gcip/gcip-pm.h
@@ -40,12 +40,14 @@ struct gcip_pm_args {
 	void *data;
 	/*
 	 * Device-specific power up.
-	 * Called with @pm->lock hold and nesting is handled at generic layer.
+	 * Called with @pm->lock held and nesting is handled at generic layer.
+	 * The IP driver may reject power on for such conditions as thermal suspend in this
+	 * callback.
 	 */
 	int (*power_up)(void *data);
 	/*
 	 * Device-specific power down.
-	 * Called with @pm->lock hold and nesting is handled at generic layer.
+	 * Called with @pm->lock held and nesting is handled at generic layer.
 	 * Returning -EAGAIN will trigger a retry after GCIP_ASYNC_POWER_DOWN_RETRY_DELAY ms.
 	 */
 	int (*power_down)(void *data);
@@ -106,7 +108,7 @@ bool gcip_pm_is_powered(struct gcip_pm *pm);
 /* Shuts down the device if @pm->count equals to 0 or @force is true. */
 void gcip_pm_shutdown(struct gcip_pm *pm, bool force);
 
-/* Make sure @pm->lock is hold. */
+/* Make sure @pm->lock is held. */
 static inline void gcip_pm_lockdep_assert_held(struct gcip_pm *pm)
 {
 	if (!pm)
diff --git a/gxp-common-platform.c b/gxp-common-platform.c
index baa8707..7514b11 100644
--- a/gxp-common-platform.c
+++ b/gxp-common-platform.c
@@ -2109,6 +2109,11 @@ static int gxp_common_platform_probe(struct platform_device *pdev, struct gxp_de
 		if (ret)
 			goto err_dma_fence_destroy;
 	}
+	/*
+	 * We only know where the system config region is after after_probe is
+	 * done so this can't be called earlier.
+	 */
+	gxp_fw_data_populate_system_config(gxp);
 
 	gxp->misc_dev.minor = MISC_DYNAMIC_MINOR;
 	gxp->misc_dev.name = GXP_NAME;
diff --git a/gxp-core-telemetry.c b/gxp-core-telemetry.c
index 2b76ef2..bfa9264 100644
--- a/gxp-core-telemetry.c
+++ b/gxp-core-telemetry.c
@@ -839,6 +839,7 @@ int gxp_core_telemetry_register_eventfd(struct gxp_dev *gxp, u8 type, int fd)
 		break;
 	default:
 		ret = -EINVAL;
+		eventfd_ctx_put(new_ctx);
 		goto out;
 	}
 
diff --git a/gxp-debug-dump.c b/gxp-debug-dump.c
index 9d1ea21..148fa25 100644
--- a/gxp-debug-dump.c
+++ b/gxp-debug-dump.c
@@ -14,10 +14,6 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 
-#if IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
-#include <linux/platform_data/sscoredump.h>
-#endif
-
 #include <gcip/gcip-pm.h>
 #include <gcip/gcip-alloc-helper.h>
 
@@ -35,6 +31,10 @@
 #include "gxp-pm.h"
 #include "gxp-vd.h"
 
+#if HAS_COREDUMP
+#include <linux/platform_data/sscoredump.h>
+#endif
+
 #define SSCD_MSG_LENGTH 64
 
 #define SYNC_BARRIER_BLOCK 0x00100000
@@ -311,7 +311,7 @@ static int gxp_get_common_dump(struct gxp_dev *gxp)
 	return ret;
 }
 
-#if IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
+#if HAS_COREDUMP
 static void gxp_send_to_sscd(struct gxp_dev *gxp, void *segs, int seg_cnt,
 			     const char *info)
 {
@@ -464,42 +464,6 @@ static int gxp_user_buffers_vmap(struct gxp_dev *gxp,
 out:
 	return cnt;
 }
-#endif
-
-void gxp_debug_dump_invalidate_segments(struct gxp_dev *gxp, uint32_t core_id)
-{
-	int i;
-	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
-	struct gxp_core_dump *core_dump;
-	struct gxp_common_dump *common_dump;
-	struct gxp_core_dump_header *core_dump_header;
-
-	core_dump = mgr->core_dump;
-	common_dump = mgr->common_dump;
-	if (!core_dump || !common_dump) {
-		dev_dbg(gxp->dev,
-			"Failed to get core_dump or common_dump for invalidating segments\n");
-		return;
-	}
-
-	core_dump_header = &core_dump->core_dump_header[core_id];
-	if (!core_dump_header) {
-		dev_dbg(gxp->dev,
-			"Failed to get core_dump_header for invalidating segments\n");
-		return;
-	}
-
-	for (i = 0; i < GXP_NUM_COMMON_SEGMENTS; i++)
-		common_dump->seg_header[i].valid = 0;
-
-	for (i = 0; i < GXP_NUM_CORE_SEGMENTS; i++)
-		core_dump_header->seg_header[i].valid = 0;
-
-	for (i = 0; i < GXP_NUM_BUFFER_MAPPINGS; i++)
-		core_dump_header->core_header.user_bufs[i].size = 0;
-
-	core_dump_header->core_header.dump_available = 0;
-}
 
 /**
  * gxp_map_fw_rw_section() - Maps the fw rw section address and size to be
@@ -555,6 +519,43 @@ static int gxp_map_fw_rw_section(struct gxp_dev *gxp,
 	return -ENXIO;
 }
 
+#endif /* HAS_COREDUMP */
+
+void gxp_debug_dump_invalidate_segments(struct gxp_dev *gxp, uint32_t core_id)
+{
+	int i;
+	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
+	struct gxp_core_dump *core_dump;
+	struct gxp_common_dump *common_dump;
+	struct gxp_core_dump_header *core_dump_header;
+
+	core_dump = mgr->core_dump;
+	common_dump = mgr->common_dump;
+	if (!core_dump || !common_dump) {
+		dev_dbg(gxp->dev,
+			"Failed to get core_dump or common_dump for invalidating segments\n");
+		return;
+	}
+
+	core_dump_header = &core_dump->core_dump_header[core_id];
+	if (!core_dump_header) {
+		dev_dbg(gxp->dev,
+			"Failed to get core_dump_header for invalidating segments\n");
+		return;
+	}
+
+	for (i = 0; i < GXP_NUM_COMMON_SEGMENTS; i++)
+		common_dump->seg_header[i].valid = 0;
+
+	for (i = 0; i < GXP_NUM_CORE_SEGMENTS; i++)
+		core_dump_header->seg_header[i].valid = 0;
+
+	for (i = 0; i < GXP_NUM_BUFFER_MAPPINGS; i++)
+		core_dump_header->core_header.user_bufs[i].size = 0;
+
+	core_dump_header->core_header.dump_available = 0;
+}
+
 /*
  * Caller must make sure that gxp->debug_dump_mgr->common_dump and
  * gxp->debug_dump_mgr->core_dump are not NULL.
@@ -569,7 +570,7 @@ static int gxp_handle_debug_dump(struct gxp_dev *gxp,
 		&core_dump->core_dump_header[core_id];
 	struct gxp_core_header *core_header = &core_dump_header->core_header;
 	int ret = 0;
-#if IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
+#if HAS_COREDUMP
 	struct gxp_common_dump *common_dump = mgr->common_dump;
 	int i;
 	int seg_idx = 0;
@@ -577,7 +578,7 @@ static int gxp_handle_debug_dump(struct gxp_dev *gxp,
 	char sscd_msg[SSCD_MSG_LENGTH];
 	void *user_buf_vaddrs[GXP_NUM_BUFFER_MAPPINGS];
 	int user_buf_cnt;
-#endif
+#endif /* HAS_COREDUMP */
 
 	/* Core */
 	if (!core_header->dump_available) {
@@ -586,7 +587,7 @@ static int gxp_handle_debug_dump(struct gxp_dev *gxp,
 		goto out;
 	}
 
-#if IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
+#if HAS_COREDUMP
 	/* Common */
 	data_addr = &common_dump->common_dump_data.common_regs;
 	for (i = 0; i < GXP_NUM_COMMON_SEGMENTS; i++) {
@@ -637,7 +638,7 @@ static int gxp_handle_debug_dump(struct gxp_dev *gxp,
 	}
 	/* fw ro section */
 	mgr->segs[core_id][seg_idx].addr = gxp->fwbufs[core_id].vaddr;
-	mgr->segs[core_id][seg_idx].size = vd->fw_ro_size;
+	mgr->segs[core_id][seg_idx].size = gxp->fwbufs[core_id].size;
 	seg_idx++;
 
 	/* fw rw section */
@@ -671,7 +672,7 @@ out_efault:
 
 		gxp_user_buffers_vunmap(gxp, vd, core_header);
 	}
-#endif
+#endif /* HAS_COREDUMP */
 
 out:
 	gxp_debug_dump_invalidate_segments(gxp, core_id);
@@ -681,7 +682,7 @@ out:
 
 static int gxp_init_segments(struct gxp_dev *gxp)
 {
-#if !(IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP))
+#if !HAS_COREDUMP
 	return 0;
 #else
 	struct gxp_debug_dump_manager *mgr = gxp->debug_dump_mgr;
@@ -691,7 +692,7 @@ static int gxp_init_segments(struct gxp_dev *gxp)
 		return -ENOMEM;
 
 	return 0;
-#endif
+#endif /* HAS_COREDUMP */
 }
 
 /*
diff --git a/gxp-debug-dump.h b/gxp-debug-dump.h
index 1cf9219..aeb8229 100644
--- a/gxp-debug-dump.h
+++ b/gxp-debug-dump.h
@@ -12,13 +12,16 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
-#if IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
-#include <linux/platform_data/sscoredump.h>
-#endif
-
 #include "gxp-dma.h"
 #include "gxp-internal.h"
 
+#define HAS_COREDUMP                                                           \
+	(IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP))
+
+#if HAS_COREDUMP
+#include <linux/platform_data/sscoredump.h>
+#endif
+
 #define GXP_NUM_COMMON_SEGMENTS 2
 #define GXP_NUM_CORE_SEGMENTS 8
 #define GXP_NUM_BUFFER_MAPPINGS 32
@@ -188,7 +191,7 @@ struct gxp_debug_dump_manager {
 	 * time
 	 */
 	struct mutex debug_dump_lock;
-#if IS_ENABLED(CONFIG_GXP_TEST) || IS_ENABLED(CONFIG_SUBSYSTEM_COREDUMP)
+#if HAS_COREDUMP
 	struct sscd_segment segs[GXP_NUM_CORES][GXP_NUM_SEGMENTS_PER_CORE];
 #endif
 };
diff --git a/gxp-dma-iommu.c b/gxp-dma-iommu.c
index ad1111b..3312bb1 100644
--- a/gxp-dma-iommu.c
+++ b/gxp-dma-iommu.c
@@ -22,6 +22,9 @@
 #include "gxp-ssmt.h"
 #include "gxp.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/gxp.h>
+
 struct gxp_dma_iommu_manager {
 	struct gxp_dma_manager dma_mgr;
 	struct gxp_iommu_domain *default_domain;
@@ -639,6 +642,8 @@ int gxp_dma_map_sg(struct gxp_dev *gxp, struct gxp_iommu_domain *gdomain,
 	int prot = map_flags_to_iommu_prot(direction, attrs, gxp_dma_flags);
 	ssize_t size_mapped;
 
+	trace_gxp_dma_map_sg_start(nents);
+
 	nents_mapped = dma_map_sg_attrs(gxp->dev, sg, nents, direction, attrs);
 	if (!nents_mapped)
 		return 0;
@@ -655,6 +660,8 @@ int gxp_dma_map_sg(struct gxp_dev *gxp, struct gxp_iommu_domain *gdomain,
 	if (size_mapped <= 0)
 		goto err;
 
+	trace_gxp_dma_map_sg_end(nents_mapped, size_mapped);
+
 	return nents_mapped;
 
 err:
@@ -670,6 +677,8 @@ void gxp_dma_unmap_sg(struct gxp_dev *gxp, struct gxp_iommu_domain *gdomain,
 	int i;
 	size_t size = 0;
 
+	trace_gxp_dma_unmap_sg_start(nents);
+
 	for_each_sg (sg, s, nents, i)
 		size += sg_dma_len(s);
 
@@ -677,6 +686,8 @@ void gxp_dma_unmap_sg(struct gxp_dev *gxp, struct gxp_iommu_domain *gdomain,
 		dev_warn(gxp->dev, "Failed to unmap sg\n");
 
 	dma_unmap_sg_attrs(gxp->dev, sg, nents, direction, attrs);
+
+	trace_gxp_dma_unmap_sg_end(size);
 }
 
 int gxp_dma_map_iova_sgt(struct gxp_dev *gxp, struct gxp_iommu_domain *gdomain,
diff --git a/gxp-firmware-data.c b/gxp-firmware-data.c
index 09c1326..841e80e 100644
--- a/gxp-firmware-data.c
+++ b/gxp-firmware-data.c
@@ -5,94 +5,28 @@
  * Copyright (C) 2021 Google LLC
  */
 
-#include <linux/bitops.h>
-#include <linux/dma-mapping.h>
-#include <linux/genalloc.h>
+#include <linux/slab.h>
 
+#include "gxp-config.h"
 #include "gxp-debug-dump.h"
 #include "gxp-firmware-data.h"
-#include "gxp-firmware-loader.h"
 #include "gxp-firmware.h" /* gxp_core_boot */
 #include "gxp-host-device-structs.h"
 #include "gxp-internal.h"
-#include "gxp-range-alloc.h"
 #include "gxp-vd.h"
 #include "gxp.h"
 
-/*
- * The minimum alignment order (power of 2) of allocations in the firmware data
- * region.
- */
-#define FW_DATA_STORAGE_ORDER 3
-
 /* A byte pattern to pre-populate the FW region with */
 #define FW_DATA_DEBUG_PATTERN 0x66
 
-/* IDs for dedicated doorbells used by some system components */
-#define DOORBELL_ID_CORE_WAKEUP(__core__) (0 + __core__)
-
-/* IDs for dedicated sync barriers used by some system components */
-#define SYNC_BARRIER_ID_UART 1
-
 /* Default application parameters */
 #define DEFAULT_APP_ID 1
-#define DEFAULT_APP_USER_MEM_SIZE (120 * 1024)
-#define DEFAULT_APP_USER_MEM_ALIGNMENT 8
-#define DEFAULT_APP_THREAD_COUNT 2
-#define DEFAULT_APP_TCM_PER_BANK (100 * 1024)
-#define DEFAULT_APP_USER_DOORBELL_COUNT 2
-#define DEFAULT_APP_USER_BARRIER_COUNT 2
-
-/* Core-to-core mailbox communication constants */
-#define CORE_TO_CORE_MBX_CMD_COUNT 10
-#define CORE_TO_CORE_MBX_RSP_COUNT 10
-
-/* A block allocator managing and partitioning a memory region for device use */
-struct fw_memory_allocator {
-	struct gen_pool *pool;
-	struct gxp_dev *gxp;
-	void *base_host_addr;
-	uint32_t base_device_addr;
-};
-
-/* A memory region allocated for device use */
-struct fw_memory {
-	void *host_addr;
-	uint32_t device_addr;
-	size_t sz;
-};
 
 /*
  * Holds information about system-wide HW and memory resources given to the FWs
  * of GXP devices.
  */
 struct gxp_fw_data_manager {
-	/* Host-side pointers for book keeping */
-	void *fw_data_virt;
-	struct gxp_system_descriptor *system_desc;
-
-	/* Doorbells allocator and reserved doorbell IDs */
-	struct range_alloc *doorbell_allocator;
-	int core_wakeup_doorbells[GXP_NUM_WAKEUP_DOORBELLS];
-	int semaphore_doorbells[GXP_NUM_CORES];
-
-	/* Sync barriers allocator and reserved sync barrier IDs */
-	struct range_alloc *sync_barrier_allocator;
-	int uart_sync_barrier;
-	int timer_regions_barrier;
-	int watchdog_region_barrier;
-	int uart_region_barrier;
-	int doorbell_regions_barrier;
-	int sync_barrier_regions_barrier;
-	int semaphores_regions_barrier;
-
-	/* System-wide device memory resources */
-	struct fw_memory_allocator *allocator;
-	struct fw_memory sys_desc_mem;
-	struct fw_memory wdog_mem;
-	struct fw_memory core_telemetry_mem;
-	struct fw_memory debug_dump_mem;
-
 	/* Cached core telemetry descriptors. */
 	struct gxp_core_telemetry_descriptor core_telemetry_desc;
 	/*
@@ -107,477 +41,6 @@ struct gxp_fw_data_manager {
 	struct gxp_system_descriptor_rw *sys_desc_rw;
 };
 
-/* A container holding information for a single GXP application. */
-struct app_metadata {
-	struct gxp_fw_data_manager *mgr;
-	struct gxp_virtual_device *vd;
-	uint application_id;
-	uint core_count;
-	uint core_list; /* bitmap of cores allocated to this app */
-
-	/* Per-app doorbell IDs */
-	int user_doorbells_count;
-	int *user_doorbells;
-
-	/* Per-app sync barrier IDs */
-	int user_barriers_count;
-	int *user_barriers;
-
-	/* Per-app memory regions */
-	struct fw_memory user_mem;
-	struct fw_memory doorbells_mem;
-	struct fw_memory sync_barriers_mem;
-	struct fw_memory semaphores_mem;
-	struct fw_memory cores_mem;
-	struct fw_memory core_cmd_queues_mem[GXP_NUM_CORES];
-	struct fw_memory core_rsp_queues_mem[GXP_NUM_CORES];
-	struct fw_memory app_mem;
-};
-
-static struct fw_memory_allocator *mem_alloc_create(struct gxp_dev *gxp,
-						    void *host_base,
-						    uint32_t device_base,
-						    size_t size)
-{
-	struct fw_memory_allocator *allocator;
-	int ret = 0;
-
-	allocator = kzalloc(sizeof(*allocator), GFP_KERNEL);
-	if (!allocator)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Use a genpool to allocate and free chunks of the virtual address
-	 * space reserved for FW data. The genpool doesn't use the passed
-	 * addresses internally to access any data, thus it is safe to use it to
-	 * manage memory that the host may not be able to access directly.
-	 * The allocator also records the host-side address so that the code
-	 * here can access and populate data in this region.
-	 */
-	allocator->gxp = gxp;
-	allocator->pool = gen_pool_create(FW_DATA_STORAGE_ORDER, /*nid=*/-1);
-	if (!allocator->pool) {
-		dev_err(gxp->dev, "Failed to create memory pool\n");
-		kfree(allocator);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	ret = gen_pool_add(allocator->pool, device_base, size, /*nid=*/-1);
-	if (ret) {
-		dev_err(gxp->dev, "Failed to add memory to pool (ret = %d)\n",
-			ret);
-		gen_pool_destroy(allocator->pool);
-		kfree(allocator);
-		return ERR_PTR(ret);
-	}
-	allocator->base_host_addr = host_base;
-	allocator->base_device_addr = device_base;
-
-	return allocator;
-}
-
-static int mem_alloc_allocate(struct fw_memory_allocator *allocator,
-			      struct fw_memory *mem, size_t size,
-			      uint8_t alignment)
-{
-	struct genpool_data_align data = { .align = alignment };
-	uint32_t dev_addr;
-
-	dev_addr = gen_pool_alloc_algo(allocator->pool, size,
-				       gen_pool_first_fit_align, &data);
-	if (!dev_addr)
-		return -ENOMEM;
-
-	mem->host_addr = allocator->base_host_addr +
-			 (dev_addr - allocator->base_device_addr);
-	mem->device_addr = dev_addr;
-	mem->sz = size;
-
-	return 0;
-}
-
-static void mem_alloc_free(struct fw_memory_allocator *allocator,
-			   struct fw_memory *mem)
-{
-	gen_pool_free(allocator->pool, mem->device_addr, mem->sz);
-}
-
-static void mem_alloc_destroy(struct fw_memory_allocator *allocator)
-{
-	WARN_ON(gen_pool_avail(allocator->pool) !=
-		gen_pool_size(allocator->pool));
-	gen_pool_destroy(allocator->pool);
-	kfree(allocator);
-}
-
-static struct fw_memory init_doorbells(struct app_metadata *app)
-{
-	struct gxp_doorbells_descriptor *db_region;
-	struct fw_memory mem;
-	uint32_t mem_size;
-	uint32_t doorbell_count;
-	int i;
-
-	doorbell_count = app->user_doorbells_count;
-	mem_size = sizeof(*db_region) +
-		   doorbell_count * sizeof(db_region->doorbells[0]);
-
-	mem_alloc_allocate(app->mgr->allocator, &mem, mem_size,
-			   __alignof__(struct gxp_doorbells_descriptor));
-
-	db_region = mem.host_addr;
-	db_region->application_id = app->application_id;
-	db_region->protection_barrier = app->mgr->doorbell_regions_barrier;
-	db_region->num_items = doorbell_count;
-	for (i = 0; i < doorbell_count; i++) {
-		db_region->doorbells[i].users_count = 0;
-		db_region->doorbells[i].hw_doorbell_idx =
-			app->user_doorbells[i];
-	}
-
-	return mem;
-}
-
-static struct fw_memory init_sync_barriers(struct app_metadata *app)
-{
-	struct gxp_sync_barriers_descriptor *sb_region;
-	struct fw_memory mem;
-	uint32_t mem_size;
-	uint32_t barrier_count;
-	int i;
-
-	barrier_count = app->user_barriers_count;
-	mem_size = sizeof(*sb_region) +
-		   barrier_count * sizeof(sb_region->barriers[0]);
-
-	mem_alloc_allocate(app->mgr->allocator, &mem, mem_size,
-			   __alignof__(struct gxp_sync_barriers_descriptor));
-
-	sb_region = mem.host_addr;
-	sb_region->application_id = app->application_id;
-	sb_region->protection_barrier = app->mgr->sync_barrier_regions_barrier;
-	sb_region->num_items = barrier_count;
-	for (i = 0; i < barrier_count; i++) {
-		sb_region->barriers[i].users_count = 0;
-		sb_region->barriers[i].hw_barrier_idx = app->user_barriers[i];
-	}
-
-	return mem;
-}
-
-static struct fw_memory init_watchdog(struct gxp_fw_data_manager *mgr)
-{
-	struct gxp_watchdog_descriptor *wd_region;
-	struct fw_memory mem;
-
-	mem_alloc_allocate(mgr->allocator, &mem, sizeof(*wd_region),
-			   __alignof__(struct gxp_watchdog_descriptor));
-
-	wd_region = mem.host_addr;
-	wd_region->protection_barrier = mgr->watchdog_region_barrier;
-	wd_region->target_value = 0;
-	wd_region->participating_cores = 0;
-	wd_region->responded_cores = 0;
-	wd_region->tripped = 0;
-
-	return mem;
-}
-
-static struct fw_memory init_core_telemetry(struct gxp_fw_data_manager *mgr)
-{
-	struct gxp_core_telemetry_descriptor *tel_region;
-	struct fw_memory mem;
-
-	mem_alloc_allocate(mgr->allocator, &mem, sizeof(*tel_region),
-			   __alignof__(struct gxp_core_telemetry_descriptor));
-
-	tel_region = mem.host_addr;
-
-	/*
-	 * Core telemetry is disabled for now.
-	 * Subsuequent calls to the FW data module can be used to populate or
-	 * depopulate the descriptor pointers on demand.
-	 */
-	memset(tel_region, 0x00, sizeof(*tel_region));
-
-	return mem;
-}
-
-static struct fw_memory init_debug_dump(struct gxp_dev *gxp)
-{
-	struct fw_memory mem;
-
-	if (gxp->debug_dump_mgr) {
-		mem.host_addr = gxp->debug_dump_mgr->buf.vaddr;
-		mem.device_addr = gxp->debug_dump_mgr->buf.dsp_addr;
-		mem.sz = gxp->debug_dump_mgr->buf.size;
-	} else {
-		mem.host_addr = 0;
-		mem.device_addr = 0;
-		mem.sz = 0;
-	}
-
-	return mem;
-}
-
-static struct fw_memory init_app_user_memory(struct app_metadata *app,
-					     int memory_size)
-{
-	struct fw_memory mem;
-
-	mem_alloc_allocate(app->mgr->allocator, &mem, memory_size,
-			   DEFAULT_APP_USER_MEM_ALIGNMENT);
-
-	return mem;
-}
-
-static struct fw_memory init_app_semaphores(struct app_metadata *app)
-{
-	struct gxp_semaphores_descriptor *sm_region;
-	struct fw_memory mem;
-	uint32_t mem_size;
-	uint32_t semaphore_count;
-	int core;
-	int i;
-
-	semaphore_count = NUM_SYSTEM_SEMAPHORES;
-	mem_size = sizeof(*sm_region) +
-		   semaphore_count * sizeof(sm_region->semaphores[0]);
-
-	mem_alloc_allocate(app->mgr->allocator, &mem, mem_size,
-			   __alignof__(struct gxp_semaphores_descriptor));
-
-	sm_region = mem.host_addr;
-	sm_region->application_id = app->application_id;
-	sm_region->protection_barrier = app->mgr->semaphores_regions_barrier;
-
-	core = 0;
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		if (app->core_list & BIT(i))
-			sm_region->wakeup_doorbells[core++] =
-				app->mgr->semaphore_doorbells[i];
-		sm_region->woken_pending_semaphores[i] = 0;
-	}
-
-	sm_region->num_items = semaphore_count;
-	for (i = 0; i < semaphore_count; i++) {
-		sm_region->semaphores[i].users_count = 0;
-		sm_region->semaphores[i].count = 0;
-		sm_region->semaphores[i].waiters = 0;
-	}
-
-	return mem;
-}
-
-static struct fw_memory init_app_cores(struct app_metadata *app)
-{
-	struct gxp_cores_descriptor *cd_region;
-	struct gxp_queue_info *q_info;
-	struct fw_memory mem;
-	uint32_t mem_size;
-	int semaphore_id;
-	int core_count;
-	int i;
-	const int cmd_queue_items = CORE_TO_CORE_MBX_CMD_COUNT;
-	const int resp_queue_items = CORE_TO_CORE_MBX_RSP_COUNT;
-
-	/* Core info structures. */
-	core_count = app->core_count;
-	mem_size =
-		sizeof(*cd_region) + core_count * sizeof(cd_region->cores[0]);
-
-	mem_alloc_allocate(app->mgr->allocator, &mem, mem_size,
-			   __alignof__(struct gxp_cores_descriptor));
-
-	cd_region = mem.host_addr;
-	cd_region->num_items = core_count;
-
-	/* Command and response queues. */
-	semaphore_id = 0;
-	for (i = 0; i < core_count; i++) {
-		/* Allocate per-core command queue storage. */
-		mem_size = cmd_queue_items *
-			   sizeof(struct gxp_core_to_core_command);
-		mem_alloc_allocate(
-			app->mgr->allocator, &app->core_cmd_queues_mem[i],
-			mem_size, __alignof__(struct gxp_core_to_core_command));
-
-		/* Update per-core command queue info. */
-		q_info = &cd_region->cores[i].incoming_commands_queue;
-		q_info->header.storage =
-			app->core_cmd_queues_mem[i].device_addr;
-		q_info->header.head_idx = 0;
-		q_info->header.tail_idx = 0;
-		q_info->header.element_size =
-			sizeof(struct gxp_core_to_core_command);
-		q_info->header.elements_count = cmd_queue_items;
-		q_info->access_sem_id = semaphore_id++;
-		q_info->posted_slots_sem_id = semaphore_id++;
-		q_info->free_slots_sem_id = semaphore_id++;
-
-		/* Allocate per-core response queue storage. */
-		mem_size = resp_queue_items *
-			   sizeof(struct gxp_core_to_core_response);
-		mem_alloc_allocate(
-			app->mgr->allocator, &app->core_rsp_queues_mem[i],
-			mem_size,
-			__alignof__(struct gxp_core_to_core_response));
-
-		/* Update per-core response queue info. */
-		q_info = &cd_region->cores[i].incoming_responses_queue;
-		q_info->header.storage =
-			app->core_rsp_queues_mem[i].device_addr;
-		q_info->header.head_idx = 0;
-		q_info->header.tail_idx = 0;
-		q_info->header.element_size =
-			sizeof(struct gxp_core_to_core_response);
-		q_info->header.elements_count = resp_queue_items;
-		q_info->access_sem_id = semaphore_id++;
-		q_info->posted_slots_sem_id = semaphore_id++;
-		q_info->free_slots_sem_id = semaphore_id++;
-	}
-
-	return mem;
-}
-
-static struct fw_memory init_application(struct app_metadata *app)
-{
-	struct gxp_application_descriptor *app_region;
-	struct fw_memory mem;
-	const int user_mem_size = DEFAULT_APP_USER_MEM_SIZE;
-
-	/* App's system memory. */
-	app->user_mem = init_app_user_memory(app, user_mem_size);
-
-	/* App's doorbells region. */
-	app->doorbells_mem = init_doorbells(app);
-
-	/* App's  sync barriers region. */
-	app->sync_barriers_mem = init_sync_barriers(app);
-
-	/* App's semaphores region. */
-	app->semaphores_mem = init_app_semaphores(app);
-
-	/* App's cores info and core-to-core queues. */
-	app->cores_mem = init_app_cores(app);
-
-	/* App's descriptor. */
-	mem_alloc_allocate(app->mgr->allocator, &mem, sizeof(*app_region),
-			   __alignof__(struct gxp_application_descriptor));
-	app_region = mem.host_addr;
-	app_region->application_id = app->application_id;
-	app_region->core_count = app->core_count;
-	app_region->cores_mask = app->core_list;
-	app_region->threads_count = DEFAULT_APP_THREAD_COUNT;
-	app_region->tcm_memory_per_bank = DEFAULT_APP_TCM_PER_BANK;
-	app_region->system_memory_size = user_mem_size;
-	app_region->system_memory_addr = app->user_mem.device_addr;
-	app_region->doorbells_dev_addr = app->doorbells_mem.device_addr;
-	app_region->sync_barriers_dev_addr = app->sync_barriers_mem.device_addr;
-	app_region->semaphores_dev_addr = app->semaphores_mem.device_addr;
-	app_region->cores_info_dev_addr = app->cores_mem.device_addr;
-
-	return mem;
-}
-
-static struct app_metadata *gxp_fw_data_create_app_legacy(struct gxp_dev *gxp,
-							  uint core_list)
-{
-	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
-	struct app_metadata *app;
-	void *err;
-	int i;
-
-	app = kzalloc(sizeof(*app), GFP_KERNEL);
-	if (!app)
-		return ERR_PTR(-ENOMEM);
-
-	/* Create resource and memory allocations for new app */
-	app->mgr = mgr;
-	app->application_id = DEFAULT_APP_ID;
-	app->core_count = hweight_long(core_list);
-	app->core_list = core_list;
-
-	/* User doorbells */
-	app->user_doorbells_count = DEFAULT_APP_USER_DOORBELL_COUNT;
-	app->user_doorbells =
-		kcalloc(app->user_doorbells_count, sizeof(int), GFP_KERNEL);
-	if (!app->user_doorbells) {
-		err = ERR_PTR(-ENOMEM);
-		goto err_user_doorbells;
-	}
-
-	for (i = 0; i < app->user_doorbells_count; i++) {
-		range_alloc_get_any(mgr->doorbell_allocator,
-				    &app->user_doorbells[i]);
-	}
-
-	/* User sync barrier */
-	app->user_barriers_count = DEFAULT_APP_USER_BARRIER_COUNT;
-	app->user_barriers =
-		kcalloc(app->user_barriers_count, sizeof(int), GFP_KERNEL);
-	if (!app->user_barriers) {
-		err = ERR_PTR(-ENOMEM);
-		goto err_user_barriers;
-	}
-
-	for (i = 0; i < app->user_barriers_count; i++) {
-		range_alloc_get_any(mgr->sync_barrier_allocator,
-				    &app->user_barriers[i]);
-	}
-
-	/* Application region. */
-	app->app_mem = init_application(app);
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		if (core_list & BIT(i)) {
-			mgr->system_desc->app_descriptor_dev_addr[i] =
-				app->app_mem.device_addr;
-		}
-	}
-
-	return app;
-
-err_user_barriers:
-	for (i = 0; i < app->user_doorbells_count; i++)
-		range_alloc_put(mgr->doorbell_allocator,
-				app->user_doorbells[i]);
-	kfree(app->user_doorbells);
-err_user_doorbells:
-	kfree(app);
-
-	return err;
-}
-
-static void gxp_fw_data_destroy_app_legacy(struct gxp_dev *gxp,
-					   struct app_metadata *app)
-{
-	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
-	int i;
-
-	for (i = 0; i < app->user_doorbells_count; i++)
-		range_alloc_put(mgr->doorbell_allocator,
-				app->user_doorbells[i]);
-	kfree(app->user_doorbells);
-
-	for (i = 0; i < app->user_barriers_count; i++)
-		range_alloc_put(mgr->sync_barrier_allocator,
-				app->user_barriers[i]);
-	kfree(app->user_barriers);
-
-	mem_alloc_free(mgr->allocator, &app->user_mem);
-	mem_alloc_free(mgr->allocator, &app->doorbells_mem);
-	mem_alloc_free(mgr->allocator, &app->sync_barriers_mem);
-	mem_alloc_free(mgr->allocator, &app->semaphores_mem);
-	mem_alloc_free(mgr->allocator, &app->cores_mem);
-	for (i = 0; i < app->core_count; i++) {
-		mem_alloc_free(mgr->allocator, &app->core_cmd_queues_mem[i]);
-		mem_alloc_free(mgr->allocator, &app->core_rsp_queues_mem[i]);
-	}
-	mem_alloc_free(mgr->allocator, &app->app_mem);
-
-	kfree(app);
-}
-
 /*
  * Here assumes sys_cfg contains gxp_system_descriptor_ro in the first page and
  * gxp_system_descriptor_rw in the second page.
@@ -623,31 +86,23 @@ static void set_system_cfg_region(struct gxp_dev *gxp, void *sys_cfg)
 	gxp->data_mgr->sys_desc_rw = des_rw;
 }
 
-static struct app_metadata *
-_gxp_fw_data_create_app(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
+static void _gxp_fw_data_populate_vd_cfg(struct gxp_dev *gxp,
+					 struct gxp_virtual_device *vd)
 {
-	struct app_metadata *app;
 	struct gxp_host_control_region *core_cfg;
 	struct gxp_job_descriptor job;
 	struct gxp_vd_descriptor *vd_desc;
 	int i;
 
-	/*
-	 * If we are able to know where sys_cfg's virt is on init() then we
-	 * don't need this here, but to keep compatibility with
-	 * !use_per_vd_config, we keep gxp_fw_data_init() doing the
-	 * initialization of legacy mode, and have here copy the values to the
-	 * config region.
-	 */
-	if (vd->vdid == 1)
-		set_system_cfg_region(gxp, vd->sys_cfg.vaddr);
-	app = kzalloc(sizeof(*app), GFP_KERNEL);
-	if (!app)
-		return ERR_PTR(-ENOMEM);
-
 	if (!gxp_core_boot(gxp)) {
 		dev_info(gxp->dev, "Skip setting VD and core CFG");
-		return app;
+		return;
+	}
+	if (!vd->vd_cfg.vaddr || !vd->core_cfg.vaddr) {
+		dev_warn(
+			gxp->dev,
+			"Missing VD and core CFG in image config, firmware is not bootable\n");
+		return;
 	}
 	/* Set up VD config region. */
 	vd_desc = vd->vd_cfg.vaddr;
@@ -676,210 +131,63 @@ _gxp_fw_data_create_app(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
 			   vd->core_cfg.size / GXP_NUM_CORES * i;
 		core_cfg->job_descriptor = job;
 	}
-
-	return app;
 }
 
-static void _gxp_fw_data_destroy_app(struct gxp_dev *gxp,
-				     struct app_metadata *app)
+static struct core_telemetry_descriptor *
+gxp_fw_data_get_core_telemetry_descriptor(struct gxp_dev *gxp, u8 type)
 {
-	kfree(app);
+	struct gxp_core_telemetry_descriptor *descriptor =
+		&gxp->data_mgr->core_telemetry_desc;
+
+	if (type == GXP_TELEMETRY_TYPE_LOGGING)
+		return descriptor->per_core_loggers;
+	else if (type == GXP_TELEMETRY_TYPE_TRACING)
+		return descriptor->per_core_tracers;
+	else
+		return ERR_PTR(-EINVAL);
 }
 
 int gxp_fw_data_init(struct gxp_dev *gxp)
 {
 	struct gxp_fw_data_manager *mgr;
-	int res;
-	int i;
+	void *virt;
 
 	mgr = devm_kzalloc(gxp->dev, sizeof(*mgr), GFP_KERNEL);
 	if (!mgr)
 		return -ENOMEM;
-	gxp->data_mgr = mgr;
 
-	/*
-	 * TODO (b/200169232) Using memremap until devm_memremap is added to
-	 * the GKI ABI
-	 */
-	mgr->fw_data_virt = memremap(gxp->fwdatabuf.paddr, gxp->fwdatabuf.size,
-				     MEMREMAP_WC);
+	virt = memremap(gxp->fwdatabuf.paddr, gxp->fwdatabuf.size, MEMREMAP_WC);
 
-	if (IS_ERR_OR_NULL(mgr->fw_data_virt)) {
+	if (IS_ERR_OR_NULL(virt)) {
 		dev_err(gxp->dev, "Failed to map fw data region\n");
-		res = -ENODEV;
-		goto err;
-	}
-	gxp->fwdatabuf.vaddr = mgr->fw_data_virt;
-
-	/* Instantiate the doorbells allocator with all doorbells */
-	mgr->doorbell_allocator =
-		range_alloc_create(/*start=*/0, DOORBELL_COUNT);
-	if (IS_ERR(mgr->doorbell_allocator)) {
-		dev_err(gxp->dev, "Failed to create doorbells allocator\n");
-		res = PTR_ERR(mgr->doorbell_allocator);
-		mgr->doorbell_allocator = NULL;
-		goto err;
-	}
-
-	/* Instantiate the sync barriers allocator with all sync barriers */
-	mgr->sync_barrier_allocator =
-		range_alloc_create(/*start=*/0, SYNC_BARRIER_COUNT);
-	if (IS_ERR(mgr->sync_barrier_allocator)) {
-		dev_err(gxp->dev, "Failed to create sync barriers allocator\n");
-		res = PTR_ERR(mgr->sync_barrier_allocator);
-		mgr->sync_barrier_allocator = NULL;
-		goto err;
-	}
-
-	/* Allocate doorbells */
-
-	/* Pinned: Cores wakeup doorbell */
-	for (i = 0; i < GXP_NUM_WAKEUP_DOORBELLS; i++) {
-		mgr->core_wakeup_doorbells[i] = DOORBELL_ID_CORE_WAKEUP(i);
-		res = range_alloc_get(mgr->doorbell_allocator,
-				      mgr->core_wakeup_doorbells[i]);
-		if (res)
-			goto err;
-	}
-
-	/* Semaphores operation doorbells */
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		range_alloc_get_any(mgr->doorbell_allocator,
-				    &mgr->semaphore_doorbells[i]);
-	}
-
-	/* Allocate sync barriers */
-
-	/* Pinned: UART sync barrier */
-	mgr->uart_sync_barrier = SYNC_BARRIER_ID_UART;
-	mgr->uart_region_barrier = SYNC_BARRIER_ID_UART;
-	res = range_alloc_get(mgr->sync_barrier_allocator,
-			      mgr->uart_sync_barrier);
-	if (res)
-		goto err;
-
-	/* Doorbell regions for all apps */
-	res = range_alloc_get_any(mgr->sync_barrier_allocator,
-				  &mgr->doorbell_regions_barrier);
-	if (res)
-		goto err;
-
-	/* Sync barrier regions for all apps */
-	res = range_alloc_get_any(mgr->sync_barrier_allocator,
-				  &mgr->sync_barrier_regions_barrier);
-	if (res)
-		goto err;
-
-	/* Timer regions for all apps */
-	res = range_alloc_get_any(mgr->sync_barrier_allocator,
-				  &mgr->timer_regions_barrier);
-	if (res)
-		goto err;
-
-	/* Watchdog regions for all apps */
-	res = range_alloc_get_any(mgr->sync_barrier_allocator,
-				  &mgr->watchdog_region_barrier);
-	if (res)
-		goto err;
-
-	/* Semaphore regions for all apps */
-	res = range_alloc_get_any(mgr->sync_barrier_allocator,
-				  &mgr->semaphores_regions_barrier);
-	if (res)
-		goto err;
-
-	/* Shared firmware data memory region */
-	mgr->allocator =
-		mem_alloc_create(gxp, mgr->fw_data_virt, gxp->fwdatabuf.daddr,
-				 gxp->fwdatabuf.size);
-	if (IS_ERR(mgr->allocator)) {
-		dev_err(gxp->dev,
-			"Failed to create the FW data memory allocator\n");
-		res = PTR_ERR(mgr->allocator);
-		mgr->allocator = NULL;
-		goto err;
+		return -ENODEV;
 	}
+	gxp->fwdatabuf.vaddr = virt;
 
 	/* Populate the region with a pre-defined pattern. */
-	memset(mgr->fw_data_virt, FW_DATA_DEBUG_PATTERN, gxp->fwdatabuf.size);
-
-	/* Allocate the root system descriptor from the region */
-	mem_alloc_allocate(mgr->allocator, &mgr->sys_desc_mem,
-			   sizeof(struct gxp_system_descriptor),
-			   __alignof__(struct gxp_system_descriptor));
-	mgr->system_desc = mgr->sys_desc_mem.host_addr;
-
-	/* Allocate the watchdog descriptor from the region */
-	mgr->wdog_mem = init_watchdog(mgr);
-	mgr->system_desc->watchdog_dev_addr = mgr->wdog_mem.device_addr;
-
-	/* Allocate the descriptor for device-side core telemetry */
-	mgr->core_telemetry_mem = init_core_telemetry(mgr);
-	mgr->system_desc->core_telemetry_dev_addr =
-		mgr->core_telemetry_mem.device_addr;
-
-	/* Set the debug dump region parameters if available */
-	mgr->debug_dump_mem = init_debug_dump(gxp);
-	mgr->system_desc->debug_dump_dev_addr = mgr->debug_dump_mem.device_addr;
-
-	return res;
-
-err:
-	range_alloc_destroy(mgr->sync_barrier_allocator);
-	range_alloc_destroy(mgr->doorbell_allocator);
-	devm_kfree(gxp->dev, mgr);
-	return res;
-}
-
-void *gxp_fw_data_create_app(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
-{
-	struct app_metadata *app;
-
-	if (gxp_fw_data_use_per_vd_config(vd))
-		app = _gxp_fw_data_create_app(gxp, vd);
-	else
-		app = gxp_fw_data_create_app_legacy(gxp, vd->core_list);
-
-	if (IS_ERR(app))
-		return app;
-	app->vd = vd;
-
-	return app;
-}
-
-void gxp_fw_data_destroy_app(struct gxp_dev *gxp, void *application)
-{
-	struct app_metadata *app = application;
+	memset(virt, FW_DATA_DEBUG_PATTERN, gxp->fwdatabuf.size);
+	gxp->data_mgr = mgr;
 
-	if (!app)
-		return;
-	if (gxp_fw_data_use_per_vd_config(app->vd))
-		return _gxp_fw_data_destroy_app(gxp, app);
-	return gxp_fw_data_destroy_app_legacy(gxp, app);
+	return 0;
 }
 
 void gxp_fw_data_destroy(struct gxp_dev *gxp)
 {
 	struct gxp_fw_data_manager *mgr = gxp->data_mgr;
 
-	mem_alloc_free(mgr->allocator, &mgr->core_telemetry_mem);
-	mem_alloc_free(mgr->allocator, &mgr->wdog_mem);
-	mem_alloc_free(mgr->allocator, &mgr->sys_desc_mem);
-	mem_alloc_destroy(mgr->allocator);
-
-	range_alloc_destroy(mgr->sync_barrier_allocator);
-	range_alloc_destroy(mgr->doorbell_allocator);
-
-	/* TODO (b/200169232) Remove this once we're using devm_memremap */
-	if (mgr->fw_data_virt) {
-		memunmap(mgr->fw_data_virt);
-		mgr->fw_data_virt = NULL;
-	}
+	if (gxp->fwdatabuf.vaddr)
+		memunmap(gxp->fwdatabuf.vaddr);
 
 	devm_kfree(gxp->dev, mgr);
 	gxp->data_mgr = NULL;
 }
 
+void gxp_fw_data_populate_vd_cfg(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
+{
+	if (gxp_fw_data_use_per_vd_config(vd))
+		_gxp_fw_data_populate_vd_cfg(gxp, vd);
+}
+
 int gxp_fw_data_set_core_telemetry_descriptors(struct gxp_dev *gxp, u8 type,
 					       u32 host_status,
 					       struct gxp_coherent_buf *buffers,
@@ -920,41 +228,13 @@ int gxp_fw_data_set_core_telemetry_descriptors(struct gxp_dev *gxp, u8 type,
 	return 0;
 }
 
-struct core_telemetry_descriptor *
-gxp_fw_data_get_core_telemetry_descriptor(struct gxp_dev *gxp, u8 type)
-{
-	struct gxp_core_telemetry_descriptor *descriptor =
-		&gxp->data_mgr->core_telemetry_desc;
-
-	if (type == GXP_TELEMETRY_TYPE_LOGGING)
-		return descriptor->per_core_loggers;
-	else if (type == GXP_TELEMETRY_TYPE_TRACING)
-		return descriptor->per_core_tracers;
-	else
-		return ERR_PTR(-EINVAL);
-}
-
-static u32
-gxp_fw_data_get_core_telemetry_device_status_legacy(struct gxp_dev *gxp,
-						    uint core, u8 type)
+u32 gxp_fw_data_get_core_telemetry_device_status(struct gxp_dev *gxp, uint core,
+						 u8 type)
 {
-	struct gxp_core_telemetry_descriptor *descriptor =
-		&gxp->data_mgr->core_telemetry_desc;
+	struct gxp_system_descriptor_rw *des_rw = gxp->data_mgr->sys_desc_rw;
 
-	switch (type) {
-	case GXP_TELEMETRY_TYPE_LOGGING:
-		return descriptor->per_core_loggers[core].device_status;
-	case GXP_TELEMETRY_TYPE_TRACING:
-		return descriptor->per_core_tracers[core].device_status;
-	default:
+	if (core >= GXP_NUM_CORES)
 		return 0;
-	}
-}
-
-static u32 _gxp_fw_data_get_core_telemetry_device_status(struct gxp_dev *gxp,
-							 uint core, u8 type)
-{
-	struct gxp_system_descriptor_rw *des_rw = gxp->data_mgr->sys_desc_rw;
 
 	switch (type) {
 	case GXP_TELEMETRY_TYPE_LOGGING:
@@ -968,18 +248,32 @@ static u32 _gxp_fw_data_get_core_telemetry_device_status(struct gxp_dev *gxp,
 	}
 }
 
-u32 gxp_fw_data_get_core_telemetry_device_status(struct gxp_dev *gxp, uint core,
-						 u8 type)
+struct gxp_mapped_resource gxp_fw_data_resource(struct gxp_dev *gxp)
 {
-	if (core >= GXP_NUM_CORES)
-		return 0;
+	/*
+	 * For direct mode, the config regions are programmed by host (us); for
+	 * MCU mode, the config regions are programmed by MCU.
+	 */
+	if (gxp_is_direct_mode(gxp)) {
+		struct gxp_mapped_resource tmp = gxp->fwdatabuf;
 
-	if (gxp->fw_loader_mgr->core_img_cfg.config_version >=
-	    FW_DATA_PROTOCOL_PER_VD_CONFIG) {
-		return _gxp_fw_data_get_core_telemetry_device_status(gxp, core,
-								     type);
+		/* Leave the first piece be used for gxp_fw_data_init() */
+		tmp.vaddr += tmp.size / 2;
+		tmp.paddr += tmp.size / 2;
+		return tmp;
 	} else {
-		return gxp_fw_data_get_core_telemetry_device_status_legacy(
-			gxp, core, type);
+		return gxp->shared_buf;
 	}
 }
+
+void *gxp_fw_data_system_cfg(struct gxp_dev *gxp)
+{
+	/* Use the end of the shared region for system cfg. */
+	return gxp_fw_data_resource(gxp).vaddr + GXP_SHARED_BUFFER_SIZE -
+	       GXP_FW_DATA_SYSCFG_SIZE;
+}
+
+void gxp_fw_data_populate_system_config(struct gxp_dev *gxp)
+{
+	set_system_cfg_region(gxp, gxp_fw_data_system_cfg(gxp));
+}
diff --git a/gxp-firmware-data.h b/gxp-firmware-data.h
index edff4af..89bf9e4 100644
--- a/gxp-firmware-data.h
+++ b/gxp-firmware-data.h
@@ -9,10 +9,14 @@
 #ifndef __GXP_FIRMWARE_DATA_H__
 #define __GXP_FIRMWARE_DATA_H__
 
+#include <linux/sizes.h>
+
 #include "gxp-dma.h"
 #include "gxp-internal.h"
 #include "gxp-vd.h"
 
+#define GXP_FW_DATA_SYSCFG_SIZE SZ_8K
+
 enum gxp_fw_data_protocol {
 	/* Use the per-VD configuration region. */
 	FW_DATA_PROTOCOL_PER_VD_CONFIG = 2,
@@ -31,35 +35,20 @@ enum gxp_fw_data_protocol {
 int gxp_fw_data_init(struct gxp_dev *gxp);
 
 /**
- * gxp_fw_data_create_app() - Allocates HW and memory resources needed to create
- *                            a GXP device application (1:1 with a GXP driver
- *                            virtual device) used by the specified physical
- *                            cores.
- * @gxp: The parent GXP device
- * @vd: The virtual device this app is being created for
- *
- * Return:
- * ptr     - A pointer of the newly created application handle, an error pointer
- *           (PTR_ERR) otherwise.
- * -ENOMEM - Insufficient memory to create the application
- */
-void *gxp_fw_data_create_app(struct gxp_dev *gxp,
-			     struct gxp_virtual_device *vd);
-
-/**
- * gxp_fw_data_destroy_app() - Deallocates the HW and memory resources used by
- *                             the specified application.
+ * gxp_fw_data_destroy() - Destroys the FW data manager submodule and free all
+ *                         its resources.
  * @gxp: The parent GXP device
- * @application: The handle to the application to deallocate
  */
-void gxp_fw_data_destroy_app(struct gxp_dev *gxp, void *application);
+void gxp_fw_data_destroy(struct gxp_dev *gxp);
 
 /**
- * gxp_fw_data_destroy() - Destroys the FW data manager submodule and free all
- *                         its resources.
+ * gxp_fw_data_populate_vd_cfg() - Sets up the resources to VD's per-core config
+ *                                 regions and per-VD config regions.
  * @gxp: The parent GXP device
+ * @vd: The virtual device to be populated for
  */
-void gxp_fw_data_destroy(struct gxp_dev *gxp);
+void gxp_fw_data_populate_vd_cfg(struct gxp_dev *gxp,
+				 struct gxp_virtual_device *vd);
 
 /**
  * gxp_fw_data_set_core_telemetry_descriptors() - Set new logging or tracing
@@ -84,19 +73,6 @@ int gxp_fw_data_set_core_telemetry_descriptors(struct gxp_dev *gxp, u8 type,
 					       u32 host_status,
 					       struct gxp_coherent_buf *buffers,
 					       u32 per_buffer_size);
-/**
- * gxp_fw_data_get_core_telemetry_descriptor() - Returns the pointer of core
- *                                               telemetry descriptor of given
- *                                               type.
- * @gxp: The GXP device to get buffer descriptors for
- * @type: Either `GXP_TELEMETRY_TYPE_LOGGING` or `GXP_TELEMETRY_TYPE_TRACING`
- *
- * Return:
- *   A pointer to the descriptor.
- *   -EINVAL - If @type is invalid.
- */
-struct core_telemetry_descriptor *
-gxp_fw_data_get_core_telemetry_descriptor(struct gxp_dev *gxp, u8 type);
 
 /**
  * gxp_fw_data_get_core_telemetry_device_status() - Returns a bitfield
@@ -119,4 +95,37 @@ static inline bool gxp_fw_data_use_per_vd_config(struct gxp_virtual_device *vd)
 	return vd->config_version >= FW_DATA_PROTOCOL_PER_VD_CONFIG;
 }
 
+/**
+ * gxp_fw_data_resource() - Returns the resource of data region for host<->core
+ *		            communication.
+ * @gxp: The GXP device
+ *
+ * This function requires either @gxp->fwdatabuf or @gxp->shared_buf be
+ * initialized, so it couldn't be called during device probe time.
+ *
+ * Return: The resource.
+ */
+struct gxp_mapped_resource gxp_fw_data_resource(struct gxp_dev *gxp);
+
+/**
+ * gxp_fw_data_system_cfg() - Returns the pointer to the system config region.
+ * @gxp: The GXP device
+ *
+ * This function requires either @gxp->fwdatabuf or @gxp->shared_buf be
+ * initialized, so it couldn't be called during device probe time.
+ *
+ * Return: The pointer. This function never fails.
+ */
+void *gxp_fw_data_system_cfg(struct gxp_dev *gxp);
+
+/**
+ * gxp_fw_data_populate_system_config() - Populate settings onto firmware system
+ *                                        config region.
+ * @gxp: The GXP device
+ *
+ * This function is expected to be called after "after_probe" in the probe
+ * procedure since it uses gxp_fw_data_system_cfg().
+ */
+void gxp_fw_data_populate_system_config(struct gxp_dev *gxp);
+
 #endif /* __GXP_FIRMWARE_DATA_H__ */
diff --git a/gxp-firmware.c b/gxp-firmware.c
index d532fdf..0255994 100644
--- a/gxp-firmware.c
+++ b/gxp-firmware.c
@@ -293,42 +293,11 @@ static void gxp_program_reset_vector(struct gxp_dev *gxp, uint core,
 static void *get_scratchpad_base(struct gxp_dev *gxp,
 				 struct gxp_virtual_device *vd, uint core)
 {
-	void *mem;
-	size_t rw_size;
-
 	if (vd && gxp_fw_data_use_per_vd_config(vd))
 		return vd->core_cfg.vaddr +
 		       (vd->core_cfg.size / GXP_NUM_CORES) * core;
 
-	if (!vd || !vd->rwdata_sgt[core])
-		return gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF;
-
-	/* Return the last AURORA_SCRATCHPAD_LEN of rwdata_sgt. */
-	mem = gcip_noncontiguous_sgt_to_mem(vd->rwdata_sgt[core]);
-	rw_size = gxp->fwbufs[core].size - vd->fw_ro_size;
-	return mem + rw_size - AURORA_SCRATCHPAD_LEN;
-}
-
-/* TODO(b/265562894): remove scratchpad region support */
-static void flush_scratchpad_region(struct gxp_dev *gxp,
-				   struct gxp_virtual_device *vd, uint core)
-{
-	if (!vd || gxp_fw_data_use_per_vd_config(vd) || !vd->rwdata_sgt[core])
-		return;
-	dma_sync_sg_for_device(gxp->dev, vd->rwdata_sgt[core]->sgl,
-			       vd->rwdata_sgt[core]->orig_nents,
-			       DMA_BIDIRECTIONAL);
-}
-
-static void invalidate_scratchpad_region(struct gxp_dev *gxp,
-					struct gxp_virtual_device *vd,
-					uint core)
-{
-	if (!vd || gxp_fw_data_use_per_vd_config(vd) || !vd->rwdata_sgt[core])
-		return;
-	dma_sync_sg_for_cpu(gxp->dev, vd->rwdata_sgt[core]->sgl,
-			    vd->rwdata_sgt[core]->orig_nents,
-			    DMA_BIDIRECTIONAL);
+	return gxp->fwbufs[core].vaddr + AURORA_SCRATCHPAD_OFF;
 }
 
 static void reset_core_config_region(struct gxp_dev *gxp,
@@ -398,13 +367,11 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp,
 #endif
 	usleep_range(50 * GXP_TIME_DELAY_FACTOR, 60 * GXP_TIME_DELAY_FACTOR);
 	while (ctr--) {
-		invalidate_scratchpad_region(gxp, vd, core);
 		if (core_cfg->core_alive_magic == Q7_ALIVE_MAGIC)
 			break;
 		usleep_range(1 * GXP_TIME_DELAY_FACTOR,
 			     10 * GXP_TIME_DELAY_FACTOR);
 	}
-	invalidate_scratchpad_region(gxp, vd, core);
 	if (core_cfg->core_alive_magic != Q7_ALIVE_MAGIC) {
 		dev_err(gxp->dev, "Core %u did not respond!\n", phys_core);
 		return -EIO;
@@ -425,7 +392,6 @@ static int gxp_firmware_handshake(struct gxp_dev *gxp,
 	ctr = 1000;
 	expected_top_value = BIT(CORE_WAKEUP_DOORBELL(phys_core));
 	while (ctr--) {
-		invalidate_scratchpad_region(gxp, vd, core);
 		if (core_cfg->top_access_ok == expected_top_value)
 			break;
 		udelay(1 * GXP_TIME_DELAY_FACTOR);
@@ -1035,7 +1001,6 @@ void gxp_firmware_set_boot_mode(struct gxp_dev *gxp,
 
 	core_cfg = get_scratchpad_base(gxp, vd, core);
 	core_cfg->boot_mode = mode;
-	flush_scratchpad_region(gxp, vd, core);
 }
 
 u32 gxp_firmware_get_boot_mode(struct gxp_dev *gxp,
@@ -1048,7 +1013,6 @@ u32 gxp_firmware_get_boot_mode(struct gxp_dev *gxp,
 		return 0;
 
 	core_cfg = get_scratchpad_base(gxp, vd, core);
-	invalidate_scratchpad_region(gxp, vd, core);
 	return core_cfg->boot_mode;
 }
 
diff --git a/gxp-host-device-structs.h b/gxp-host-device-structs.h
index 4597a28..efb39a9 100644
--- a/gxp-host-device-structs.h
+++ b/gxp-host-device-structs.h
@@ -11,11 +11,11 @@
  * headers or data structures.
  *
  */
+
 #ifndef __GXP_HOST_DEVICE_STRUCTURES_H__
 #define __GXP_HOST_DEVICE_STRUCTURES_H__
 
 #define MAX_NUM_CORES 4
-#define NUM_SYSTEM_SEMAPHORES 64
 
 /* The number of physical doorbells and sync barriers allocated to each VD */
 #define GXP_NUM_DOORBELLS_PER_VD 7
@@ -148,66 +148,6 @@
 /* Invalid boot mode request code */
 #define GXP_BOOT_MODE_STATUS_INVALID_MODE               10
 
-/* A structure describing the state of the doorbells on the system. */
-struct gxp_doorbells_descriptor {
-	/* The app this descriptor belongs to. */
-	uint32_t application_id;
-	/* The physical ID of the sync barrier protecting this region. */
-	uint32_t protection_barrier;
-	/* The number of doorbells described in this region. */
-	uint32_t num_items;
-	/* The list of doorbells available for usage. */
-	struct dooorbell_metadata_t {
-		/*
-		 * The number of users using this doorbell. 0 when it's
-		 * available.
-		 */
-		uint32_t users_count;
-		/* The 0-based index of the doorbell described by this entry. */
-		uint32_t hw_doorbell_idx;
-	} doorbells[];
-};
-
-/* A structure describing the state of the sync barriers on the system. */
-struct gxp_sync_barriers_descriptor {
-	/* The app this descriptor belongs to. */
-	uint32_t application_id;
-	/* The physical ID of the sync barrier protecting this region. */
-	uint32_t protection_barrier;
-	/* The number of sync barriers described in this region. */
-	uint32_t num_items;
-	/* The list of sync barriers available for usage. */
-	struct sync_barrier_metadata_t {
-		/*
-		 * The number of users using this barrier. 0 when it's
-		 * available.
-		 */
-		uint32_t users_count;
-		/*
-		 * The 0-based index of the sync barrier described by this
-		 * entry.
-		 */
-		uint32_t hw_barrier_idx;
-	} barriers[];
-};
-
-/* A structure describing the state of the watchdog on the system. */
-struct gxp_watchdog_descriptor {
-	/* The physical ID of the sync barrier protecting this region. */
-	uint32_t protection_barrier;
-	/*
-	 * The number of timer ticks before the watchdog expires.
-	 * This is in units of 244.14 ns.
-	 */
-	uint32_t target_value;
-	/* A bit mask of the cores expected to tickle the watchdog. */
-	uint32_t participating_cores;
-	/* A bit mask of the cores that have tickled the watchdog. */
-	uint32_t responded_cores;
-	/* A flag indicating whether or not the watchdog has tripped. */
-	uint32_t tripped;
-};
-
 /*
  * A structure describing the core telemetry (logging and tracing) parameters
  * and buffers.
@@ -239,171 +179,6 @@ struct gxp_core_telemetry_descriptor {
 };
 
 /*
- * A structure describing the state and allocations of the SW-based semaphores
- * on the system.
- */
-struct gxp_semaphores_descriptor {
-	/* The app this descriptor belongs to. */
-	uint32_t application_id;
-	/* The physical ID of the sync barrier protecting this region. */
-	uint32_t protection_barrier;
-	/*
-	 * An array where each element is dedicated to a core. The element is a
-	 * bit map describing of all the semaphores in the list below that have
-	 * been unlocked but haven't been processed yet by the receiptient core.
-	 */
-	uint64_t woken_pending_semaphores[MAX_NUM_CORES];
-	/*
-	 * A mapping of which doorbells to use as a wakeup signal source per
-	 * core.
-	 */
-	uint32_t wakeup_doorbells[MAX_NUM_CORES];
-	/* The number of items described in this region. */
-	uint32_t num_items;
-	/* The list of semaphores available for usage. */
-	struct semaphore_metadata {
-		/*
-		 * The number of users using this semaphore. 0 when it's for
-		 * creation.
-		 * Note: this is not the count value of the semaphore, but just
-		 * an indication if this slot is available.
-		 */
-		uint32_t users_count;
-		/*
-		 * This is the semaphore count. Cores will block when they call
-		 * 'Wait()' while this count is 0.
-		 */
-		uint32_t count;
-		/*
-		 * A bit map of 'NUM_DSP_CORES' bits indicating which cores are
-		 * currently waiting on this semaphore to become available.
-		 */
-		uint32_t waiters;
-	} semaphores[NUM_SYSTEM_SEMAPHORES];
-};
-
-/* A basic unidirectional queue. */
-struct gxp_queue_info {
-	/* A header describing the queue and its state. */
-	struct queue_header {
-		/* A device-side pointer of the storage managed by this queue */
-		uint32_t storage;
-		/* The index to the head of the queue. */
-		uint32_t head_idx;
-		/* The index to the tail of the queue. */
-		uint32_t tail_idx;
-		/* The size of an element stored this queue. */
-		uint32_t element_size;
-		/* The number of elements that can be stored in this queue. */
-		uint32_t elements_count;
-	} header;
-	/* The semaphore ID controlling exclusive access to this core. */
-	uint32_t access_sem_id;
-	/*
-	 * The ID for the semaphore containing the number of unprocessed items
-	 * pushed to this queue.
-	 */
-	uint32_t posted_slots_sem_id;
-	/*
-	 * The ID for the semaphore containing the number of free slots
-	 * available to store data in this queue.
-	 */
-	uint32_t free_slots_sem_id;
-};
-
-/* A struct describing a single core's set of incoming queues. */
-struct gxp_core_info {
-	/*
-	 * The metadata for the queue holding incoming commands from other
-	 * cores.
-	 */
-	struct gxp_queue_info incoming_commands_queue;
-	/*
-	 * The metadata for the queue holding incoming responses from other
-	 * cores.
-	 */
-	struct gxp_queue_info incoming_responses_queue;
-};
-
-/* A structure describing all the cores' per-core metadata. */
-struct gxp_cores_descriptor {
-	/* The number of cores described in this descriptor. */
-	uint32_t num_items;
-	/* The descriptors for each core. */
-	struct gxp_core_info cores[];
-};
-
-/*
- * The top level descriptor describing memory regions used to access system-wide
- * structures and resources.
- */
-struct gxp_system_descriptor {
-	/* A device address for the application data descriptor. */
-	uint32_t app_descriptor_dev_addr[MAX_NUM_CORES];
-	/* A device address for the watchdog descriptor. */
-	uint32_t watchdog_dev_addr;
-	/* A device address for the core telemetry descriptor */
-	uint32_t core_telemetry_dev_addr;
-	/* A device address for the common debug dump region */
-	uint32_t debug_dump_dev_addr;
-};
-
-/* A structure describing the metadata belonging to a specific application. */
-struct gxp_application_descriptor {
-	/* The ID for this GXP application. */
-	uint32_t application_id;
-	/* The number of cores this application has. */
-	uint16_t core_count;
-	/*
-	 * The cores mask; a bit at index `n` indicates that core `n` is part of
-	 * this app.
-	 */
-	uint16_t cores_mask;
-	/* The number of threads allocated for each core. */
-	uint16_t threads_count;
-	/* The size of system memory given to this app. */
-	uint32_t system_memory_size;
-	/* The device-address of the system memory given to this app. */
-	uint32_t system_memory_addr;
-	/* The size of TCM memory allocated per bank for this app. */
-	uint32_t tcm_memory_per_bank;   /* in units of 4 kB */
-	/* A device address for the doorbells descriptor. */
-	uint32_t doorbells_dev_addr;
-	/* A device address for the sync barriers descriptor. */
-	uint32_t sync_barriers_dev_addr;
-	/* A device address for the semaphores descriptor. */
-	uint32_t semaphores_dev_addr;
-	/* A device address for the cores cmd/rsp queues descriptor. */
-	uint32_t cores_info_dev_addr;
-};
-
-/* The structure describing a core-to-core command. */
-struct gxp_core_to_core_command {
-	/* The source of port number (the core's virtual ID) of the command. */
-	uint32_t source;
-	/* The command's sequence number. */
-	uint64_t sequence_number;
-	/* The command payload device address. */
-	uint64_t device_address;
-	/* The size of the payload in bytes. */
-	uint32_t size;
-	/* The generic command flags. */
-	uint32_t flags;
-};
-
-/* The structure describing a core-to-core response. */
-struct gxp_core_to_core_response {
-	/* The source of port number (the core's virtual ID) of the response. */
-	uint32_t source;
-	/* The response's sequence number. */
-	uint64_t sequence_number;
-	/* The response error code (if any). */
-	uint16_t error_code;
-	/* The response return value (filled-in by the user). */
-	int32_t cmd_retval;
-};
-
-/*
  * A structure for describing the state of the job this worker core is part of.
  * This struct is expected to change per dispatch/context switch/preepmtion as
  * it describes the HW resources, FW IDs, and other parameters that may change
diff --git a/gxp-mcu-platform.c b/gxp-mcu-platform.c
index cfbb433..0c4d2aa 100644
--- a/gxp-mcu-platform.c
+++ b/gxp-mcu-platform.c
@@ -24,9 +24,9 @@
 #define KCI_RETURN_GET_ERROR_CODE(ret) (KCI_RETURN_ERROR_CODE_MASK & (ret))
 
 #if IS_ENABLED(CONFIG_GXP_TEST)
-char *gxp_work_mode_name = "direct";
+char *gxp_work_mode_name = "mcu";
 #else
-static char *gxp_work_mode_name = "direct";
+static char *gxp_work_mode_name = "mcu";
 #endif
 
 module_param_named(work_mode, gxp_work_mode_name, charp, 0660);
diff --git a/gxp-range-alloc.c b/gxp-range-alloc.c
deleted file mode 100644
index 73aa6af..0000000
--- a/gxp-range-alloc.c
+++ /dev/null
@@ -1,118 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * GXP ranged resource allocator.
- *
- * Copyright (C) 2021 Google LLC
- */
-
-#include "gxp-range-alloc.h"
-
-struct range_alloc *range_alloc_create(int start, int end)
-{
-	struct range_alloc *ra;
-	int count;
-	int size;
-
-	count = end - start;
-	if (count <= 0)
-		return ERR_PTR(-EINVAL);
-
-	size = sizeof(struct range_alloc) + count * sizeof(int);
-	ra = kzalloc(size, GFP_KERNEL);
-	if (!ra)
-		return ERR_PTR(-ENOMEM);
-
-	ra->total_count = count;
-	ra->free_count = count;
-	ra->start_index = start;
-	mutex_init(&ra->lock);
-
-	return ra;
-}
-
-int range_alloc_get(struct range_alloc *r, int element)
-{
-	int index = element - r->start_index;
-
-	mutex_lock(&r->lock);
-	if (index < 0 || index >= r->total_count) {
-		mutex_unlock(&r->lock);
-		return -EINVAL;
-	}
-
-	if (r->elements[index]) {
-		mutex_unlock(&r->lock);
-		return -EBUSY;
-	}
-
-	r->elements[index] = 1;
-	r->free_count--;
-
-	mutex_unlock(&r->lock);
-	return 0;
-}
-
-int range_alloc_get_any(struct range_alloc *r, int *element)
-{
-	int i;
-
-	mutex_lock(&r->lock);
-	if (!r->free_count) {
-		mutex_unlock(&r->lock);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < r->total_count; i++) {
-		if (r->elements[i] == 0) {
-			r->elements[i] = 1;
-			r->free_count--;
-			*element = i + r->start_index;
-			mutex_unlock(&r->lock);
-			return 0;
-		}
-	}
-	mutex_unlock(&r->lock);
-	return -ENOMEM;
-}
-
-int range_alloc_put(struct range_alloc *r, int element)
-{
-	int index = element - r->start_index;
-
-	mutex_lock(&r->lock);
-	if (index < 0 || index >= r->total_count) {
-		mutex_unlock(&r->lock);
-		return -EINVAL;
-	}
-
-	if (r->elements[index] == 0) {
-		mutex_unlock(&r->lock);
-		return -EBUSY;
-	}
-
-	r->elements[index] = 0;
-	r->free_count++;
-
-	mutex_unlock(&r->lock);
-	return 0;
-}
-
-int range_alloc_num_free(struct range_alloc *r)
-{
-	int free_count;
-
-	mutex_lock(&r->lock);
-	free_count = r->free_count;
-	mutex_unlock(&r->lock);
-
-	return free_count;
-}
-
-int range_alloc_destroy(struct range_alloc *r)
-{
-	if (!r)
-		return -EFAULT;
-	kfree(r);
-
-	return 0;
-}
diff --git a/gxp-range-alloc.h b/gxp-range-alloc.h
deleted file mode 100644
index ed8c2f0..0000000
--- a/gxp-range-alloc.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * GXP ranged resource allocator.
- *
- * Copyright (C) 2021 Google LLC
- */
-#ifndef __GXP_RANGE_ALLOC_H__
-#define __GXP_RANGE_ALLOC_H__
-
-#include <linux/mutex.h>
-#include <linux/slab.h>
-
-struct range_alloc {
-	int total_count;
-	int free_count;
-	int start_index;
-	struct mutex lock;
-	int elements[];
-};
-
-/**
- * range_alloc_create() - Creates a range allocator starting at the specified
- *			  start (inclusive) and ends at the specified end
- *			  (exclusive).
- * @start: The start of the range (inclusive).
- * @end: The end of the range (exclusive)
- *
- * Return:
- * ptr     - A pointer of the newly created allocator handle on success, an
- *	     error pointer (PTR_ERR) otherwise.
- * -EINVAL - Invalid start/end combination
- * -ENOMEM - Insufficient memory to create the allocator
- */
-struct range_alloc *range_alloc_create(int start, int end);
-
-/**
- * range_alloc_get() - Gets the specified element from the range.
- * @r: The range allocator
- * @element: The element to acquire from the range
- *
- * The @element argument should be within the allocator's range and has not been
- * allocated before.
- *
- * Return:
- * 0       - Successfully reserved @element
- * -EINVAL - Invalid element index (negative or outside allocator range)
- * -EBUSY  - Element is already allocated
- */
-int range_alloc_get(struct range_alloc *r, int element);
-
-/**
- * range_alloc_get_any() - Gets any free element in the range.
- * @r: The range allocator
- * @element: A pointer to use to store the allocated element
- *
- * Return:
- * 0       - Successful reservation
- * -ENOMEM - No elements left in the range to allocate
- */
-int range_alloc_get_any(struct range_alloc *r, int *element);
-
-/**
- * range_alloc_put() - Puts an element back into the range.
- * @r: The range allocator
- * @element: The element to put back into the range
- *
- * Return:
- * 0       - Successful placement back into the range
- * -EINVAL - Invalid element index (negative or outside allocator range)
- * -EBUSY  - The element is still present in the range
- */
-int range_alloc_put(struct range_alloc *r, int element);
-
-/**
- * range_alloc_num_free() - Returns the number of free elements in the range.
- * @r: The range allocator
- *
- * Return: the number of free elements in the range
- */
-int range_alloc_num_free(struct range_alloc *r);
-
-/**
- * range_alloc_destroy() - Destroys the range allocator
- * @r: The range allocator to destroy
- *
- * The destruction does not validate that the range is empty.
- *
- * Return:
- * 0       - Successfully destroyed range allocator
- * -EFAULT - Invalid allocator address
- */
-int range_alloc_destroy(struct range_alloc *r);
-
-#endif /* __GXP_RANGE_ALLOC_H__ */
diff --git a/gxp-thermal.c b/gxp-thermal.c
index e7b9fc5..671d140 100644
--- a/gxp-thermal.c
+++ b/gxp-thermal.c
@@ -31,7 +31,7 @@ static int gxp_thermal_get_rate(void *data, unsigned long *rate)
 static int gxp_thermal_set_rate(void *data, unsigned long rate)
 {
 	struct gxp_dev *gxp = data;
-	int ret;
+	int ret = 0;
 
 	if (!gxp_is_direct_mode(gxp)) {
 #if GXP_HAS_MCU
diff --git a/gxp-vd.c b/gxp-vd.c
index c38d175..baf10b9 100644
--- a/gxp-vd.c
+++ b/gxp-vd.c
@@ -34,6 +34,8 @@
 #include "gxp-pm.h"
 #include "gxp-vd.h"
 
+#include <trace/events/gxp.h>
+
 static inline void hold_core_in_reset(struct gxp_dev *gxp, uint core)
 {
 	gxp_write_32(gxp, GXP_CORE_REG_ETM_PWRCTL(core),
@@ -201,32 +203,19 @@ static int map_cfg_regions(struct gxp_virtual_device *vd,
 			   struct gcip_image_config *img_cfg)
 {
 	struct gxp_dev *gxp = vd->gxp;
-	struct gxp_mapped_resource *pool;
-	struct gxp_mapped_resource res, tmp;
+	struct gxp_mapped_resource pool;
+	struct gxp_mapped_resource res;
 	size_t offset;
 	int ret;
 
-	if (img_cfg->num_iommu_mappings < 2)
+	if (img_cfg->num_iommu_mappings < 3)
 		return map_core_shared_buffer(vd);
-
-	/*
-	 * For direct mode, the config regions are programmed by host (us); for
-	 * MCU mode, the config regions are programmed by MCU.
-	 */
-	if (gxp_is_direct_mode(gxp)) {
-		tmp = gxp->fwdatabuf;
-		/* Leave the first piece be used for gxp_fw_data_init() */
-		tmp.vaddr += tmp.size / 2;
-		tmp.paddr += tmp.size / 2;
-		pool = &tmp;
-	} else {
-		pool = &gxp->shared_buf;
-	}
+	pool = gxp_fw_data_resource(gxp);
 
 	assign_resource(&res, img_cfg, CORE_CFG_REGION_IDX);
 	offset = vd->slice_index * GXP_SHARED_SLICE_SIZE;
-	res.vaddr = pool->vaddr + offset;
-	res.paddr = pool->paddr + offset;
+	res.vaddr = pool.vaddr + offset;
+	res.paddr = pool.paddr + offset;
 	ret = map_resource(vd, &res);
 	if (ret) {
 		dev_err(gxp->dev, "map core config %pad -> offset %#zx failed",
@@ -237,8 +226,8 @@ static int map_cfg_regions(struct gxp_virtual_device *vd,
 
 	assign_resource(&res, img_cfg, VD_CFG_REGION_IDX);
 	offset += vd->core_cfg.size;
-	res.vaddr = pool->vaddr + offset;
-	res.paddr = pool->paddr + offset;
+	res.vaddr = pool.vaddr + offset;
+	res.paddr = pool.paddr + offset;
 	ret = map_resource(vd, &res);
 	if (ret) {
 		dev_err(gxp->dev, "map VD config %pad -> offset %#zx failed",
@@ -255,15 +244,15 @@ static int map_cfg_regions(struct gxp_virtual_device *vd,
 		ret = -ENOSPC;
 		goto err_unmap_vd;
 	}
-	/*
-	 * It's okay when mappings[sys_cfg_region_idx] is not set, in which case
-	 * map_resource does nothing.
-	 */
 	assign_resource(&res, img_cfg, SYS_CFG_REGION_IDX);
-	/* Use the end of the shared region for system cfg. */
-	offset = GXP_SHARED_BUFFER_SIZE - res.size;
-	res.vaddr = pool->vaddr + offset;
-	res.paddr = pool->paddr + offset;
+	if (res.size != GXP_FW_DATA_SYSCFG_SIZE) {
+		dev_err(gxp->dev, "invalid system cfg size: %#llx", res.size);
+		ret = -EINVAL;
+		goto err_unmap_vd;
+	}
+	res.vaddr = gxp_fw_data_system_cfg(gxp);
+	offset = res.vaddr - pool.vaddr;
+	res.paddr = pool.paddr + offset;
 	ret = map_resource(vd, &res);
 	if (ret) {
 		dev_err(gxp->dev, "map sys config %pad -> offset %#zx failed",
@@ -326,11 +315,6 @@ map_fw_image_config(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 		.unmap = gxp_vd_imgcfg_unmap,
 	};
 
-	/*
-	 * Allow to skip for test suites need VD but doesn't need the FW module.
-	 */
-	if (IS_ENABLED(CONFIG_GXP_TEST) && !fw_loader_mgr)
-		return 0;
 	cfg = &fw_loader_mgr->core_img_cfg;
 	ret = gcip_image_config_parser_init(&vd->cfg_parser, &gxp_vd_imgcfg_ops,
 					    gxp->dev, vd);
@@ -348,13 +332,6 @@ map_fw_image_config(struct gxp_dev *gxp, struct gxp_virtual_device *vd,
 		gcip_image_config_clear(&vd->cfg_parser);
 		return ret;
 	}
-	vd->fw_ro_size = cfg->firmware_size;
-	/*
-	 * To be compatible with image config without setting firmware_size,
-	 * fall back to map the whole region to carveout.
-	 */
-	if (vd->fw_ro_size == 0)
-		vd->fw_ro_size = gxp->fwbufs[0].size;
 
 	return 0;
 }
@@ -366,102 +343,22 @@ static void unmap_fw_image_config(struct gxp_dev *gxp,
 	gcip_image_config_clear(&vd->cfg_parser);
 }
 
-/*
- * For each core,
- *  - fw_rw_size = fwbufs[core].size - fw_ro_size
- *  - allocates rwdata_sgt[core] with size fw_rw_size
- *  - maps fwbufs[core].daddr -> fwbufs[core].paddr with size fw_ro_size
- *  - maps fwbufs[core].daddr + fw_ro_size -> rwdata_sgt[core]
- */
-static int alloc_and_map_fw_image(struct gxp_dev *gxp,
-				  struct gxp_virtual_device *vd)
+static int map_fw_image(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
 {
-	size_t ro_size = vd->fw_ro_size, rw_size;
 	struct gxp_iommu_domain *gdomain = vd->domain;
-	int i, ret;
-
-	/* Maps all FW regions together and no rwdata_sgt in this case. */
-	if (ro_size == gxp->fwbufs[0].size)
-		return gxp_iommu_map(gxp, gdomain, gxp->fwbufs[0].daddr,
-				     gxp->fwbufs[0].paddr,
-				     ro_size * GXP_NUM_CORES,
-				     IOMMU_READ | IOMMU_WRITE);
-
-	dev_info(gxp->dev, "mapping firmware RO size %#zx", ro_size);
-	rw_size = gxp->fwbufs[0].size - ro_size;
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		vd->rwdata_sgt[i] =
-			gcip_alloc_noncontiguous(gxp->dev, rw_size, GFP_KERNEL);
-		if (!vd->rwdata_sgt[i]) {
-			dev_err(gxp->dev,
-				"allocate firmware data for core %d failed", i);
-			ret = -ENOMEM;
-			goto err_free_sgt;
-		}
-	}
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		ret = gxp_iommu_map(gxp, gdomain, gxp->fwbufs[i].daddr,
-				    gxp->fwbufs[i].paddr, ro_size,
-				    IOMMU_READ | IOMMU_WRITE);
-		if (ret) {
-			dev_err(gxp->dev, "map firmware RO for core %d failed",
-				i);
-			goto err_unmap;
-		}
-		ret = gxp_dma_map_iova_sgt(gxp, vd->domain,
-					   gxp->fwbufs[i].daddr + ro_size,
-					   vd->rwdata_sgt[i],
-					   IOMMU_READ | IOMMU_WRITE);
-		if (ret) {
-			dev_err(gxp->dev, "map firmware RW for core %d failed",
-				i);
-			gxp_iommu_unmap(gxp, gdomain, gxp->fwbufs[i].daddr,
-					ro_size);
-			goto err_unmap;
-		}
-	}
-	return 0;
 
-err_unmap:
-	while (i--) {
-		gxp_iommu_unmap(gxp, gdomain, gxp->fwbufs[i].daddr, ro_size);
-		gxp_dma_unmap_iova_sgt(gxp, vd->domain,
-				       gxp->fwbufs[i].daddr + ro_size,
-				       vd->rwdata_sgt[i]);
-	}
-err_free_sgt:
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		if (vd->rwdata_sgt[i])
-			gcip_free_noncontiguous(vd->rwdata_sgt[i]);
-		vd->rwdata_sgt[i] = NULL;
-	}
-	return ret;
+	/* Maps all FW regions together. */
+	return gxp_iommu_map(gxp, gdomain, gxp->fwbufs[0].daddr,
+			     gxp->fwbufs[0].paddr,
+			     gxp->fwbufs[0].size * GXP_NUM_CORES, IOMMU_READ);
 }
 
-static void unmap_and_free_fw_image(struct gxp_dev *gxp,
-				    struct gxp_virtual_device *vd)
+static void unmap_fw_image(struct gxp_dev *gxp, struct gxp_virtual_device *vd)
 {
-	size_t ro_size = vd->fw_ro_size;
 	struct gxp_iommu_domain *gdomain = vd->domain;
-	int i;
-
-	if (ro_size == gxp->fwbufs[0].size) {
-		gxp_iommu_unmap(gxp, gdomain, gxp->fwbufs[0].daddr,
-				ro_size * GXP_NUM_CORES);
-		return;
-	}
 
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		gxp_iommu_unmap(gxp, gdomain, gxp->fwbufs[i].daddr, ro_size);
-		gxp_dma_unmap_iova_sgt(gxp, vd->domain,
-				       gxp->fwbufs[i].daddr + ro_size,
-				       vd->rwdata_sgt[i]);
-	}
-	for (i = 0; i < GXP_NUM_CORES; i++) {
-		if (vd->rwdata_sgt[i])
-			gcip_free_noncontiguous(vd->rwdata_sgt[i]);
-		vd->rwdata_sgt[i] = NULL;
-	}
+	gxp_iommu_unmap(gxp, gdomain, gxp->fwbufs[0].daddr,
+			gxp->fwbufs[0].size * GXP_NUM_CORES);
 }
 
 static int map_core_telemetry_buffers(struct gxp_dev *gxp,
@@ -646,9 +543,7 @@ static void vd_restore_doorbells(struct gxp_virtual_device *vd)
 static void set_config_version(struct gxp_dev *gxp,
 			       struct gxp_virtual_device *vd)
 {
-	if (gxp->firmware_mgr && vd->sys_cfg.daddr)
-		vd->config_version =
-			gxp->fw_loader_mgr->core_img_cfg.config_version;
+	vd->config_version = gxp->fw_loader_mgr->core_img_cfg.config_version;
 	/*
 	 * Let gxp_dma_map_core_resources() map this region only when using the
 	 * legacy protocol.
@@ -684,6 +579,8 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 	int i;
 	int err;
 
+	trace_gxp_vd_allocate_start(requested_cores);
+
 	lockdep_assert_held_write(&gxp->vd_semaphore);
 	/* Assumes 0 < requested_cores <= GXP_NUM_CORES */
 	if (requested_cores == 0 || requested_cores > GXP_NUM_CORES)
@@ -751,18 +648,13 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 		goto error_unassign_cores;
 
 	set_config_version(gxp, vd);
-	/* After map_fw_image_config because it needs vd->sys_cfg. */
-	vd->fw_app = gxp_fw_data_create_app(gxp, vd);
-	if (IS_ERR(vd->fw_app)) {
-		err = PTR_ERR(vd->fw_app);
-		vd->fw_app = NULL;
-		goto error_unmap_imgcfg;
-	}
+	/* After map_fw_image_config because it needs vd->vd/core_cfg. */
+	gxp_fw_data_populate_vd_cfg(gxp, vd);
 	err = gxp_dma_map_core_resources(gxp, vd->domain, vd->core_list,
 					 vd->slice_index);
 	if (err)
-		goto error_destroy_fw_data;
-	err = alloc_and_map_fw_image(gxp, vd);
+		goto error_unmap_imgcfg;
+	err = map_fw_image(gxp, vd);
 	if (err)
 		goto error_unmap_core_resources;
 	err = map_core_telemetry_buffers(gxp, vd, vd->core_list);
@@ -772,16 +664,16 @@ struct gxp_virtual_device *gxp_vd_allocate(struct gxp_dev *gxp,
 	if (err)
 		goto error_unmap_core_telemetry_buffer;
 
+	trace_gxp_vd_allocate_end(vd->vdid);
+
 	return vd;
 
 error_unmap_core_telemetry_buffer:
 	unmap_core_telemetry_buffers(gxp, vd, vd->core_list);
 error_unmap_fw_data:
-	unmap_and_free_fw_image(gxp, vd);
+	unmap_fw_image(gxp, vd);
 error_unmap_core_resources:
 	gxp_dma_unmap_core_resources(gxp, vd->domain, vd->core_list);
-error_destroy_fw_data:
-	gxp_fw_data_destroy_app(gxp, vd->fw_app);
 error_unmap_imgcfg:
 	unmap_fw_image_config(gxp, vd);
 error_unassign_cores:
@@ -806,6 +698,8 @@ void gxp_vd_release(struct gxp_virtual_device *vd)
 	struct gxp_dev *gxp = vd->gxp;
 	uint core_list = vd->core_list;
 
+	trace_gxp_vd_release_start(vd->vdid);
+
 	lockdep_assert_held_write(&gxp->vd_semaphore);
 	debug_dump_lock(gxp, vd);
 
@@ -817,9 +711,8 @@ void gxp_vd_release(struct gxp_virtual_device *vd)
 
 	unmap_debug_dump_buffer(gxp, vd);
 	unmap_core_telemetry_buffers(gxp, vd, core_list);
-	unmap_and_free_fw_image(gxp, vd);
+	unmap_fw_image(gxp, vd);
 	gxp_dma_unmap_core_resources(gxp, vd->domain, core_list);
-	gxp_fw_data_destroy_app(gxp, vd->fw_app);
 	unmap_fw_image_config(gxp, vd);
 	unassign_cores(vd);
 
@@ -850,6 +743,8 @@ void gxp_vd_release(struct gxp_virtual_device *vd)
 	vd->state = GXP_VD_RELEASED;
 	debug_dump_unlock(vd);
 	gxp_vd_put(vd);
+
+	trace_gxp_vd_release_end(vd->vdid);
 }
 
 int gxp_vd_block_ready(struct gxp_virtual_device *vd)
@@ -858,6 +753,8 @@ int gxp_vd_block_ready(struct gxp_virtual_device *vd)
 	enum gxp_virtual_device_state orig_state;
 	int ret;
 
+	trace_gxp_vd_block_ready_start(vd->vdid);
+
 	lockdep_assert_held_write(&gxp->vd_semaphore);
 
 	orig_state = vd->state;
@@ -876,6 +773,9 @@ int gxp_vd_block_ready(struct gxp_virtual_device *vd)
 			return ret;
 		}
 	}
+
+	trace_gxp_vd_block_ready_end(vd->vdid);
+
 	return 0;
 }
 
@@ -883,11 +783,15 @@ void gxp_vd_block_unready(struct gxp_virtual_device *vd)
 {
 	struct gxp_dev *gxp = vd->gxp;
 
+	trace_gxp_vd_block_unready_start(vd->vdid);
+
 	lockdep_assert_held_write(&gxp->vd_semaphore);
 
 	if (gxp->before_vd_block_unready)
 		gxp->before_vd_block_unready(gxp, vd);
 	gxp_dma_domain_detach_device(gxp, vd->domain);
+
+	trace_gxp_vd_block_unready_end(vd->vdid);
 }
 
 int gxp_vd_run(struct gxp_virtual_device *vd)
diff --git a/gxp-vd.h b/gxp-vd.h
index e0bd5ce..8769335 100644
--- a/gxp-vd.h
+++ b/gxp-vd.h
@@ -90,10 +90,6 @@ struct gxp_virtual_device {
 	 */
 	int slice_index;
 	/*
-	 * The SG table that holds the firmware RW data region.
-	 */
-	struct sg_table *rwdata_sgt[GXP_NUM_CORES];
-	/*
 	 * The SG table that holds the regions specified in the image config's
 	 * non-secure IOMMU mappings.
 	 */
@@ -101,8 +97,6 @@ struct gxp_virtual_device {
 		dma_addr_t daddr;
 		struct sg_table *sgt;
 	} ns_regions[GCIP_IMG_CFG_MAX_NS_IOMMU_MAPPINGS];
-	/* The firmware size specified in image config. */
-	u32 fw_ro_size;
 	/*
 	 * The config regions specified in image config.
 	 * core_cfg's size should be a multiple of GXP_NUM_CORES.
diff --git a/include/trace/events/gxp.h b/include/trace/events/gxp.h
new file mode 100644
index 0000000..1e4257b
--- /dev/null
+++ b/include/trace/events/gxp.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Trace events for gxp
+ *
+ * Copyright (c) 2023 Google LLC
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gxp
+
+#if !defined(_TRACE_GXP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GXP_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(gxp_dma_map_sg_start,
+
+	    TP_PROTO(int nents),
+
+	    TP_ARGS(nents),
+
+	    TP_STRUCT__entry(__field(int, nents)),
+
+	    TP_fast_assign(__entry->nents = nents;),
+
+	    TP_printk("nents = %d", __entry->nents));
+
+TRACE_EVENT(gxp_dma_map_sg_end,
+
+	    TP_PROTO(int nents_mapped, ssize_t size_mapped),
+
+	    TP_ARGS(nents_mapped, size_mapped),
+
+	    TP_STRUCT__entry(__field(int, nents_mapped)
+				     __field(ssize_t, size_mapped)),
+
+	    TP_fast_assign(__entry->nents_mapped = nents_mapped;
+			   __entry->size_mapped = size_mapped;),
+
+	    TP_printk("nents_mapped = %d, size_mapped = %ld",
+		      __entry->nents_mapped, __entry->size_mapped));
+
+TRACE_EVENT(gxp_dma_unmap_sg_start,
+
+	    TP_PROTO(int nents),
+
+	    TP_ARGS(nents),
+
+	    TP_STRUCT__entry(__field(int, nents)),
+
+	    TP_fast_assign(__entry->nents = nents;),
+
+	    TP_printk("nents = %d", __entry->nents));
+
+TRACE_EVENT(gxp_dma_unmap_sg_end,
+
+	    TP_PROTO(size_t size),
+
+	    TP_ARGS(size),
+
+	    TP_STRUCT__entry(__field(size_t, size)),
+
+	    TP_fast_assign(__entry->size = size;),
+
+	    TP_printk("size = %ld", __entry->size));
+
+TRACE_EVENT(gxp_vd_block_ready_start,
+
+	    TP_PROTO(int vdid),
+
+	    TP_ARGS(vdid),
+
+	    TP_STRUCT__entry(__field(int, vdid)),
+
+	    TP_fast_assign(__entry->vdid = vdid;),
+
+	    TP_printk("vdid = %d", __entry->vdid));
+
+TRACE_EVENT(gxp_vd_block_ready_end,
+
+	    TP_PROTO(int vdid),
+
+	    TP_ARGS(vdid),
+
+	    TP_STRUCT__entry(__field(int, vdid)),
+
+	    TP_fast_assign(__entry->vdid = vdid;),
+
+	    TP_printk("vdid = %d", __entry->vdid));
+
+TRACE_EVENT(gxp_vd_block_unready_start,
+
+	    TP_PROTO(int vdid),
+
+	    TP_ARGS(vdid),
+
+	    TP_STRUCT__entry(__field(int, vdid)),
+
+	    TP_fast_assign(__entry->vdid = vdid;),
+
+	    TP_printk("vdid = %d", __entry->vdid));
+
+TRACE_EVENT(gxp_vd_block_unready_end,
+
+	    TP_PROTO(int vdid),
+
+	    TP_ARGS(vdid),
+
+	    TP_STRUCT__entry(__field(int, vdid)),
+
+	    TP_fast_assign(__entry->vdid = vdid;),
+
+	    TP_printk("vdid = %d", __entry->vdid));
+
+TRACE_EVENT(gxp_vd_allocate_start,
+
+	    TP_PROTO(u16 requested_cores),
+
+	    TP_ARGS(requested_cores),
+
+	    TP_STRUCT__entry(__field(u16, requested_cores)),
+
+	    TP_fast_assign(__entry->requested_cores = requested_cores;),
+
+	    TP_printk("requested_cores = %d", __entry->requested_cores));
+
+TRACE_EVENT(gxp_vd_allocate_end,
+
+	    TP_PROTO(int vdid),
+
+	    TP_ARGS(vdid),
+
+	    TP_STRUCT__entry(__field(int, vdid)),
+
+	    TP_fast_assign(__entry->vdid = vdid;),
+
+	    TP_printk("vdid = %d", __entry->vdid));
+
+TRACE_EVENT(gxp_vd_release_start,
+
+	    TP_PROTO(int vdid),
+
+	    TP_ARGS(vdid),
+
+	    TP_STRUCT__entry(__field(int, vdid)),
+
+	    TP_fast_assign(__entry->vdid = vdid;),
+
+	    TP_printk("vdid = %d", __entry->vdid));
+
+TRACE_EVENT(gxp_vd_release_end,
+
+	    TP_PROTO(int vdid),
+
+	    TP_ARGS(vdid),
+
+	    TP_STRUCT__entry(__field(int, vdid)),
+
+	    TP_fast_assign(__entry->vdid = vdid;),
+
+	    TP_printk("vdid = %d", __entry->vdid));
+
+#endif /* _TRACE_GXP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
author	Aurora zuma automerger <aurora-zuma-automerger@google.com>	2023-03-14 18:01:36 +0000
committer	Copybara-Service <copybara-worker@google.com>	2023-03-14 11:28:21 -0700
commit	7d833613c6706f5fd9c0eec66685ee44b0a664b4 (patch)
tree	0f489702358518be1091d34c24e9a94b3bbcba59
parent	f12c18aa9d19fb142475e5bdc3cba671491ba033 (diff)
download	zuma-7d833613c6706f5fd9c0eec66685ee44b0a664b4.tar.gz