Merge remote-tracking branch 'goog/main' into udc_d1 am: b0ff36ab8d

Original change: https://googleplex-android-review.googlesource.com/c/platform/external/android-nn-driver/+/24058492 Change-Id: Iea158aab5e59c28ebe5ea7fe1fb2d0430e19822c Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
author: Renato Grottesi <otaner@google.com> 2023-07-20 07:28:34 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2023-07-20 07:28:34 +0000
commit: 83a2dedd5dd006beb9149548c0fdb2bc68feac63 (patch)
tree: 6fd69f1ccfe17fc12f05419b5ef10dbbc0c436bc
parent: 8f0ed0e336c2311b462b65df893cc085be4fd70e (diff)
parent: b0ff36ab8d0072fc4720f03ee6a1eced168e276e (diff)
download: android-nn-driver-83a2dedd5dd006beb9149548c0fdb2bc68feac63.tar.gz
92 files changed, 8626 insertions, 2417 deletions
diff --git a/.gitignore b/.gitignore
index 18a48d2..fa0683d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 armnn
 boost_1_64_0
 clframework
+flatbuffers-1.12.0
 prebuilt
 .vscode/settings.json
 .gitignore
diff --git a/.gitignore.license b/.gitignore.license
new file mode 100644
index 0000000..7964c7d
--- /dev/null
+++ b/.gitignore.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018, 2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/1.0/ArmnnDriver.hpp b/1.0/ArmnnDriver.hpp
index 61a8467..b601e87 100644
--- a/1.0/ArmnnDriver.hpp
+++ b/1.0/ArmnnDriver.hpp
@@ -10,7 +10,6 @@
 #include "../ArmnnDevice.hpp"
 #include "ArmnnDriverImpl.hpp"
 #include "HalPolicy.hpp"
-#include "NamespaceAdaptor.hpp"
 
 #include "../ArmnnDriverImpl.hpp"
 
diff --git a/1.0/ArmnnDriverImpl.cpp b/1.0/ArmnnDriverImpl.cpp
index 3f4c75b..57f828c 100644
--- a/1.0/ArmnnDriverImpl.cpp
+++ b/1.0/ArmnnDriverImpl.cpp
@@ -4,7 +4,6 @@
 //
 
 #include "ArmnnDriverImpl.hpp"
-#include "../NamespaceAdaptor.hpp"
 #include "../SystemPropertiesUtils.hpp"
 
 #include <log/log.h>
@@ -60,4 +59,4 @@ Return<void> ArmnnDriverImpl::getCapabilities(const armnn::IRuntimePtr& runtime,
 }
 
 } // namespace hal_1_0
-} // namespace armnn_driver
+} // namespace armnn_driver
+\ No newline at end of file
diff --git a/1.0/ArmnnDriverImpl.hpp b/1.0/ArmnnDriverImpl.hpp
index 544104a..3abe751 100644
--- a/1.0/ArmnnDriverImpl.hpp
+++ b/1.0/ArmnnDriverImpl.hpp
@@ -8,16 +8,19 @@
 #include <HalInterfaces.h>
 
 #include "../DriverOptions.hpp"
-#include "../NamespaceAdaptor.hpp"
 
 #include <armnn/ArmNN.hpp>
 
-#if ARMNN_ANDROID_S
-using namespace android::nn;
-#elif ARMNN_ANDROID_R
+#ifdef ARMNN_ANDROID_R
 using namespace android::nn::hal;
 #endif
 
+#ifdef ARMNN_ANDROID_S
+using namespace android::hardware;
+#endif
+
+namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
+
 namespace armnn_driver
 {
 namespace hal_1_0
diff --git a/1.0/HalPolicy.cpp b/1.0/HalPolicy.cpp
index 7e9e9ef..08de1b5 100644
--- a/1.0/HalPolicy.cpp
+++ b/1.0/HalPolicy.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -20,7 +20,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
     switch (operation.type)
     {
         case V1_0::OperationType::ADD:
-            return ConvertAdd(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Add);
         case V1_0::OperationType::AVERAGE_POOL_2D:
             return ConvertAveragePool2d(operation, model, data);
         case V1_0::OperationType::CONCATENATION:
@@ -50,7 +50,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
         case V1_0::OperationType::MAX_POOL_2D:
             return ConvertMaxPool2d(operation, model, data);
         case V1_0::OperationType::MUL:
-            return ConvertMul(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Mul);
         case V1_0::OperationType::RELU:
             return ConvertReLu(operation, model, data);
         case V1_0::OperationType::RELU1:
@@ -73,12 +73,6 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
     }
 }
 
-bool HalPolicy::ConvertAdd(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_0::HalPolicy::ConvertAdd()");
-    return ::ConvertAdd<hal_1_0::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertAveragePool2d(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_0::HalPolicy::ConvertAveragePool2d()");
@@ -115,6 +109,15 @@ bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model
     return ::ConvertDequantize<hal_1_0::HalPolicy>(operation, model, data);
 }
 
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         armnn::BinaryOperation binaryOperation)
+{
+    ALOGV("hal_1_0::HalPolicy::ConvertElementwiseBinary()");
+    return ::ConvertElementwiseBinary<hal_1_0::HalPolicy>(operation, model, data, binaryOperation);
+}
+
 bool HalPolicy::ConvertFloor(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_0::HalPolicy::ConvertFloor()");
@@ -464,10 +467,12 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsLstmSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                inputInfo,
                                outputStateInInfo,
                                cellStateInInfo,
@@ -484,6 +489,7 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
 
     // Add the layer
     armnn::IConnectableLayer* layer = data.m_Network->AddLstmLayer(desc, params, "Lstm");
+    layer->SetBackendId(setBackend);
 
     input.Connect(layer->GetInputSlot(0));
     outputStateIn.Connect(layer->GetInputSlot(1));
@@ -513,12 +519,6 @@ bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model,
     return ConvertPooling2d<hal_1_0::HalPolicy>(operation, __func__, armnn::PoolingAlgorithm::Max, model, data);
 }
 
-bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_0::HalPolicy::ConvertMul()");
-    return ::ConvertMul<hal_1_0::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertReLu(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_0::HalPolicy::ConvertReLu()");
@@ -566,10 +566,12 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsSoftmaxSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                input.GetTensorInfo(),
                                outputInfo,
                                desc);
@@ -579,7 +581,11 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddSoftmaxLayer(desc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the SoftmaxLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<hal_1_0::HalPolicy>(operation, 0, *layer, model, data);
@@ -604,7 +610,6 @@ bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& mod
     }
 
     armnn::SpaceToDepthDescriptor desc;
-    bool dataLayoutCheck;
 
     GetInputScalar<hal_1_0::HalPolicy>(operation, 1, OperandType::INT32, desc.m_BlockSize, model, data);
 
@@ -626,10 +631,12 @@ bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& mod
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsSpaceToDepthSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                inputInfo,
                                outputInfo,
                                desc);
@@ -639,7 +646,11 @@ bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& mod
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddSpaceToDepthLayer(desc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the SpaceToDepthLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<hal_1_0::HalPolicy>(operation, 0, *layer, model, data);
@@ -686,10 +697,12 @@ bool HalPolicy::ConvertResizeBilinear(const Operation& operation, const Model& m
     desc.m_DataLayout = armnn::DataLayout::NHWC;
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsResizeSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                inputInfo,
                                outputInfo,
                                desc);
@@ -705,9 +718,11 @@ bool HalPolicy::ConvertResizeBilinear(const Operation& operation, const Model& m
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddResizeLayer(desc);
-
-    assert(layer != nullptr);
-
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ResizeLayer", __func__);
+    }
     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
     input.Connect(layer->GetInputSlot(0));
 
diff --git a/1.0/HalPolicy.hpp b/1.0/HalPolicy.hpp
index 2350781..5d92f0d 100644
--- a/1.0/HalPolicy.hpp
+++ b/1.0/HalPolicy.hpp
@@ -1,15 +1,16 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #pragma once
 
 #include "../ConversionUtils.hpp"
-#include "NamespaceAdaptor.hpp"
 
 #include <HalInterfaces.h>
 
+namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
+
 namespace armnn_driver
 {
 namespace hal_1_0
@@ -30,8 +31,6 @@ public:
     static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
 
 private:
-    static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertAveragePool2d(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data);
@@ -44,6 +43,11 @@ private:
 
     static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data);
 
+    static bool ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         armnn::BinaryOperation binaryOperation);
+
     static bool ConvertFloor(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertFullyConnected(const Operation& operation, const Model& model, ConversionData& data);
@@ -62,8 +66,6 @@ private:
 
     static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertReLu(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data);
diff --git a/1.1/ArmnnDriver.hpp b/1.1/ArmnnDriver.hpp
index 091ca6e..521b918 100644
--- a/1.1/ArmnnDriver.hpp
+++ b/1.1/ArmnnDriver.hpp
@@ -14,7 +14,6 @@
 #include "../ArmnnDriverImpl.hpp"
 #include "../1.0/ArmnnDriverImpl.hpp"
 #include "../1.0/HalPolicy.hpp"
-#include "../NamespaceAdaptor.hpp"
 
 #include <log/log.h>
 
diff --git a/1.1/ArmnnDriverImpl.cpp b/1.1/ArmnnDriverImpl.cpp
index a528b96..1d1aaa7 100644
--- a/1.1/ArmnnDriverImpl.cpp
+++ b/1.1/ArmnnDriverImpl.cpp
@@ -4,7 +4,6 @@
 //
 
 #include "ArmnnDriverImpl.hpp"
-#include "NamespaceAdaptor.hpp"
 #include "../SystemPropertiesUtils.hpp"
 
 #include <log/log.h>
@@ -70,4 +69,4 @@ Return<void> ArmnnDriverImpl::getCapabilities_1_1(const armnn::IRuntimePtr& runt
 }
 
 } // namespace hal_1_1
-} // namespace armnn_driver
+} // namespace armnn_driver
+\ No newline at end of file
diff --git a/1.1/ArmnnDriverImpl.hpp b/1.1/ArmnnDriverImpl.hpp
index 3173590..c90e04b 100644
--- a/1.1/ArmnnDriverImpl.hpp
+++ b/1.1/ArmnnDriverImpl.hpp
@@ -7,17 +7,18 @@
 
 #include <HalInterfaces.h>
 
-#include "NamespaceAdaptor.hpp"
 #include "../DriverOptions.hpp"
 
 #include <armnn/ArmNN.hpp>
 
-#if ARMNN_ANDROID_S
-using namespace android::nn;
-#elif ARMNN_ANDROID_R
+#ifdef ARMNN_ANDROID_R
 using namespace android::nn::hal;
 #endif
 
+#ifdef ARMNN_ANDROID_S
+using namespace android::hardware;
+#endif
+
 
 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
 namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
diff --git a/1.1/HalPolicy.cpp b/1.1/HalPolicy.cpp
index 53a884c..cd59cd6 100644
--- a/1.1/HalPolicy.cpp
+++ b/1.1/HalPolicy.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2019,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -80,9 +80,9 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
         switch (operation.type)
         {
             case V1_1::OperationType::DIV:
-                return ConvertDiv(operation, model, data);
+                return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Div);
             case V1_1::OperationType::SUB:
-                return ConvertSub(operation, model, data);
+                return ConvertElementwiseBinary(operation, model, data, armnn::BinaryOperation::Sub);
             case V1_1::OperationType::MEAN:
                 return ConvertMean(operation, model, data);
             case V1_1::OperationType::PAD:
@@ -104,16 +104,13 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
     }
 }
 
-bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data)
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         armnn::BinaryOperation binaryOperation)
 {
-    ALOGV("hal_1_1::HalPolicy::ConvertDiv()");
-    return ::ConvertDiv<hal_1_1::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_1::HalPolicy::ConvertSub()");
-    return ::ConvertSub<hal_1_1::HalPolicy>(operation, model, data);
+    ALOGV("hal_1_1::HalPolicy::ConvertElementwiseBinary()");
+    return ::ConvertElementwiseBinary<hal_1_1::HalPolicy>(operation, model, data, binaryOperation);
 }
 
 bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data)
diff --git a/1.1/HalPolicy.hpp b/1.1/HalPolicy.hpp
index 6c53ab6..e1feb83 100644
--- a/1.1/HalPolicy.hpp
+++ b/1.1/HalPolicy.hpp
@@ -1,12 +1,11 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #pragma once
 
 #include "../ConversionUtils.hpp"
-#include "../NamespaceAdaptor.hpp"
 
 #include <HalInterfaces.h>
 
@@ -32,8 +31,11 @@ public:
     static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
 
 private:
-    static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data);
-    static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data);
+    static bool ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         armnn::BinaryOperation binaryOperation);
+
     static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data);
     static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data);
     static bool ConvertSpaceToBatchNd(const Operation& operation, const Model& model, ConversionData& data);
diff --git a/1.2/ArmnnDriver.hpp b/1.2/ArmnnDriver.hpp
index 1a9d9d3..c855b52 100644
--- a/1.2/ArmnnDriver.hpp
+++ b/1.2/ArmnnDriver.hpp
@@ -19,6 +19,8 @@
 #include "../1.0/ArmnnDriverImpl.hpp"
 #include "../1.0/HalPolicy.hpp"
 
+#include <armnn/BackendHelper.hpp>
+
 #include <log/log.h>
 
 namespace armnn_driver
@@ -129,26 +131,32 @@ public:
     Return<void> getType(getType_cb cb)
     {
         ALOGV("hal_1_2::ArmnnDriver::getType()");
-
-        cb(V1_0::ErrorStatus::NONE, V1_2::DeviceType::CPU);
+        const auto device_type = hal_1_2::HalPolicy::GetDeviceTypeFromOptions(this->m_Options);
+        cb(V1_0::ErrorStatus::NONE, device_type);
         return Void();
     }
 
     Return<V1_0::ErrorStatus> prepareModelFromCache(
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const HidlToken&,
-            const sp<V1_2::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_2::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_0::ErrorStatus::GENERAL_FAILURE;
+        return ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+                                                      m_Options,
+                                                      modelCacheHandle,
+                                                      dataCacheHandle,
+                                                      token,
+                                                      cb);
     }
 
-    Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
-            const android::sp<V1_2::IPreparedModelCallback>& cb)
+    Return<V1_0::ErrorStatus> prepareModel_1_2(
+        const V1_2::Model& model, V1_1::ExecutionPreference preference,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_2::ArmnnDriver::prepareModel_1_2()");
 
@@ -165,6 +173,9 @@ public:
                                                       m_ClTunedParameters,
                                                       m_Options,
                                                       model,
+                                                      modelCacheHandle,
+                                                      dataCacheHandle,
+                                                      token,
                                                       cb,
                                                       model.relaxComputationFloat32toFloat16
                                                       && m_Options.GetFp16Enabled());
@@ -198,9 +209,12 @@ public:
     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
     {
         ALOGV("hal_1_2::ArmnnDriver::getSupportedExtensions()");
-
-        // Set both numbers to be 0 for cache not supported.
-        cb(V1_0::ErrorStatus::NONE, 0, 0);
+        unsigned int numberOfCachedModelFiles = 0;
+        for (auto& backend : m_Options.GetBackends())
+        {
+            numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+        }
+        cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles,   1ul);
         return Void();
     }
 };
diff --git a/1.2/ArmnnDriverImpl.cpp b/1.2/ArmnnDriverImpl.cpp
index 9cef02e..f0a426f 100644
--- a/1.2/ArmnnDriverImpl.cpp
+++ b/1.2/ArmnnDriverImpl.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -8,7 +8,11 @@
 #include "../ModelToINetworkConverter.hpp"
 #include "../SystemPropertiesUtils.hpp"
 
+#include <armnnDeserializer/IDeserializer.hpp>
+
 #include <log/log.h>
+#include <sys/stat.h>
+#include <chrono>
 
 namespace
 {
@@ -56,9 +60,9 @@ const char *g_OperandTypeInt32PerformanceExecTime           = "Armnn.operandType
 const char *g_OperandTypeInt32PerformancePowerUsage         = "Armnn.operandTypeInt32Performance.powerUsage";
 
 
-void NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback>& callback,
+void NotifyCallbackAndCheck(const android::sp<V1_2::IPreparedModelCallback>& callback,
                             V1_0::ErrorStatus errorStatus,
-                            const sp<V1_2::IPreparedModel>& preparedModelPtr)
+                            const android::sp<V1_2::IPreparedModel>& preparedModelPtr)
 {
     Return<void> returned = callback->notify_1_2(errorStatus, preparedModelPtr);
     // This check is required, if the callback fails and it isn't checked it will bring down the service
@@ -71,7 +75,7 @@ void NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback>& callback,
 
 Return<V1_0::ErrorStatus> FailPrepareModel(V1_0::ErrorStatus error,
                                            const std::string& message,
-                                           const sp<V1_2::IPreparedModelCallback>& callback)
+                                           const android::sp<V1_2::IPreparedModelCallback>& callback)
 {
     ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
     NotifyCallbackAndCheck(callback, error, nullptr);
@@ -90,11 +94,16 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
        const DriverOptions& options,
        const V1_2::Model& model,
-       const sp<V1_2::IPreparedModelCallback>& cb,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+       const HidlToken& token,
+       const android::sp<V1_2::IPreparedModelCallback>& cb,
        bool float32ToFloat16)
 {
     ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()");
 
+    std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
+
     if (cb.get() == nullptr)
     {
         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
@@ -125,21 +134,66 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
         return V1_0::ErrorStatus::NONE;
     }
 
+    // Serialize the network graph to a .armnn file if an output directory
+    // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
+    bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
+    auto serializedNetworkFileName =
+        SerializeNetwork(*modelConverter.GetINetwork(),
+                         options.GetRequestInputsAndOutputsDumpDir(),
+                         dataCacheData,
+                         serializeToFile);
+
     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
-    armnn::OptimizerOptions OptOptions;
-    OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+    armnn::OptimizerOptionsOpaque OptOptions;
+    OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
+    OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+    int cachedFd = -1;
+    bool saveCachedNetwork = options.SaveCachedNetwork();
+
+    unsigned int numberOfCachedModelFiles = 0;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                numberOfCachedModelFiles += numberOfCacheFiles;
+                if (modelCacheHandle[index]->numFds == 1)
+                {
+                    if (backend == armnn::Compute::GpuAcc)
+                    {
+                        cachedFd = modelCacheHandle[index]->data[0];
+                        saveCachedNetwork = true;
+                    }
+                }
+                index += numberOfCachedModelFiles;
+            }
+        }
+    }
 
     armnn::BackendOptions gpuAcc("GpuAcc",
     {
-        { "FastMathEnabled", options.IsFastMathEnabled() }
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "SaveCachedNetwork", saveCachedNetwork },
+        { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+        { "CachedFileDescriptor", cachedFd }
     });
+
     armnn::BackendOptions cpuAcc("CpuAcc",
     {
-        { "FastMathEnabled", options.IsFastMathEnabled() }
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "NumberOfThreads", options.GetNumberOfThreads() }
     });
-    OptOptions.m_ModelOptions.push_back(gpuAcc);
-    OptOptions.m_ModelOptions.push_back(cpuAcc);
+    OptOptions.AddModelOption(gpuAcc);
+    OptOptions.AddModelOption(cpuAcc);
 
     std::vector<std::string> errMessages;
     try
@@ -178,11 +232,19 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
 
     // Load it into the runtime.
     armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    auto numInputs  = getMainModel(model).inputIndexes.size();
+    auto numOutputs = getMainModel(model).outputIndexes.size();
     try
     {
-        if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
-            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
+            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
         }
     }
     catch (std::exception& e)
@@ -193,11 +255,12 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
         return V1_0::ErrorStatus::NONE;
     }
 
-    // Now that we have a networkId for the graph rename the dump file to use it
-    // so that we can associate the graph file and the input/output tensor dump files
-    RenameGraphDotFile(dotGraphFileName,
-                       options.GetRequestInputsAndOutputsDumpDir(),
-                       netId);
+    // Now that we have a networkId for the graph rename the exported files to use it
+    // so that we can associate the graph file and the input/output tensor exported files
+    RenameExportedFiles(serializedNetworkFileName,
+                        dotGraphFileName,
+                        options.GetRequestInputsAndOutputsDumpDir(),
+                        netId);
 
     std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
             new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
@@ -205,32 +268,388 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
                     runtime.get(),
                     model,
                     options.GetRequestInputsAndOutputsDumpDir(),
-                    options.IsGpuProfilingEnabled()));
+                    options.IsGpuProfilingEnabled(),
+                    options.isAsyncModelExecutionEnabled(),
+                    options.getNoOfArmnnThreads(),
+                    options.isImportEnabled(),
+                    options.isExportEnabled()));
 
     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
-    if (!preparedModel->ExecuteWithDummyInputs())
+    // Only run this if the GpuAcc backend has been added to options
+    if (std::find(options.GetBackends().begin(),
+                  options.GetBackends().end(),
+                  armnn::Compute::GpuAcc) != options.GetBackends().end())
+    {
+        if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+        {
+            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        }
+
+        if (clTunedParameters &&
+            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+        {
+            // Now that we've done one inference the CL kernel parameters will have been tuned,
+            // so save the updated file.
+            try
+            {
+                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            }
+            catch (std::exception& error)
+            {
+                ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+                      options.GetClTunedParametersFile().c_str(), error.what());
+            }
+        }
+    }
+
+    size_t hashValue = 0;
+    // Cache the model
+    if (dataCacheHandle.size() > 0)
+    {
+        // Cache the Arm NN model, should be only 1
+        if (dataCacheHandle.size() != 1)
+        {
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
+        }
+
+        if (dataCacheHandle[0]->numFds != 1)
+        {
+            ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
+        }
+
+        if (dataCacheHandle[0]->data[0] < 0)
+        {
+            ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
+        }
+
+        int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+        if (dataCacheFileAccessMode != O_RDWR)
+        {
+            ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_2(): Invalid Access Mode.");
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
+        }
+
+        write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+        hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+    }
+
+    if (modelCacheHandle.size() > 0)
+    {
+        if (modelCacheHandle.size() != numberOfCachedModelFiles)
+        {
+            NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+            return V1_0::ErrorStatus::NONE;
+        }
+        for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
+        {
+            if (modelCacheHandle[i]->numFds == 1)
+            {
+                int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDONLY)
+                {
+                    struct stat statBuffer;
+                    if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+                    {
+                        long modelDataSize = statBuffer.st_size;
+                        if (modelDataSize > 0)
+                        {
+                            std::vector <uint8_t> modelData(modelDataSize);
+                            pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+                            hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    if (hashValue != 0)
+    {
+        CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+    }
+
+    NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
+
+    ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
+         (std::chrono::system_clock::now() - prepareModelTimepoint).count());
+
+    return V1_0::ErrorStatus::NONE;
+}
+
+Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache(
+    const armnn::IRuntimePtr& runtime,
+    const DriverOptions& options,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+    const HidlToken& token,
+    const android::sp<V1_2::IPreparedModelCallback>& cb,
+    bool float32ToFloat16)
+{
+    ALOGV("ArmnnDriverImpl::prepareModelFromCache()");
+    std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
+
+    if (cb.get() == nullptr)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid callback passed to prepareModel");
+        return V1_0::ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    if (!runtime)
+    {
+        return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
+    }
+
+    if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid token passed!", cb);
+        return V1_0::ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    // DataCacheHandle size should always be 1
+    // Arm NN model
+    if (dataCacheHandle.size() != 1)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Check if model files cached they match the expected value
+    unsigned int numberOfCachedModelFiles = 0;
+    for (auto& backend : options.GetBackends())
+    {
+        numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+    }
+    if (modelCacheHandle.size() != numberOfCachedModelFiles)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid model cache!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (dataCacheHandle[0]->numFds != 1)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, numFds != 1.");
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (dataCacheHandle[0]->data[0] < 0)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the cache data, fd < 0");
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "No data cache!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+    if (dataCacheFileAccessMode != O_RDWR)
+    {
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+    if (dataSize == 0)
     {
-        return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
     }
 
-    if (clTunedParameters &&
-        options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+    int offset = 0;
     {
-        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
-        try
+        struct stat statBuffer;
+        if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
         {
-            clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            unsigned long bufferSize = statBuffer.st_size;
+            if (bufferSize != dataSize)
+            {
+                ALOGW("ArmnnDriverImpl::prepareModelFromCache: Invalid data to deserialize!");
+                FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid data to deserialize!", cb);
+                return V1_0::ErrorStatus::GENERAL_FAILURE;
+            }
         }
-        catch (std::exception& error)
+    }
+    std::vector<uint8_t> dataCacheData(dataSize);
+    pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+    auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+    int gpuAccCachedFd = -1;
+    bool saveCachedNetwork = false;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
         {
-            ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
-                  options.GetClTunedParametersFile().c_str(), error.what());
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                if (modelCacheHandle[index]->numFds != 1)
+                {
+                    ALOGW("ArmnnDriverImpl::prepareModelFromCache: Cannot read from the model cache, numFds != 1.");
+                    FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE,
+                                     "Cannot read from the model cache, numFds != 1.", cb);
+                    return V1_0::ErrorStatus::GENERAL_FAILURE;
+                }
+                auto cachedFd = modelCacheHandle[index]->data[0];
+
+                int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDWR)
+                {
+                    FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Invalid Access Mode!", cb);
+                    return V1_0::ErrorStatus::GENERAL_FAILURE;
+                }
+
+                struct stat statBuffer;
+                if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+                {
+                    long modelDataSize = statBuffer.st_size;
+                    if (modelDataSize <= 0)
+                    {
+                        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Wrong cached model size!", cb);
+                        return V1_0::ErrorStatus::NONE;
+                    }
+                    std::vector<uint8_t> modelData(modelDataSize);
+                    pread(cachedFd, modelData.data(), modelData.size(), 0);
+                    hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+                    // For GpuAcc numberOfCachedFiles is 1
+                    if (backend == armnn::Compute::GpuAcc)
+                    {
+                        gpuAccCachedFd = cachedFd;
+                    }
+                }
+                index += numberOfCacheFiles;
+            }
         }
     }
 
+    if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache: ValidateHash() failed!");
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ValidateHash Failed!", cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Deserialize the network..
+    armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
+    try
+    {
+        network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from Deserializer.";
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_0::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptionsOpaque OptOptions;
+    OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
+    OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+                                 {
+                                         {"FastMathEnabled",       options.IsFastMathEnabled()},
+                                         {"SaveCachedNetwork",     saveCachedNetwork},
+                                         {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+                                         {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
+                                         {"CachedFileDescriptor",  gpuAccCachedFd}
+                                 });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+                                         {"FastMathEnabled", options.IsFastMathEnabled()},
+                                         {"NumberOfThreads", options.GetNumberOfThreads()}
+                                 });
+    OptOptions.AddModelOption(gpuAcc);
+    OptOptions.AddModelOption(cpuAcc);
+
+    std::vector<std::string> errMessages;
+    try
+    {
+        optNet = armnn::Optimize(*network.get(),
+                                 options.GetBackends(),
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions,
+                                 errMessages);
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from optimize.";
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_0::ErrorStatus::NONE;
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        std::stringstream message;
+        message << "Invalid optimized network";
+        for (const std::string& msg : errMessages)
+        {
+            message << "\n" << msg;
+        }
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_0::ErrorStatus::NONE;
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+                                                               options.GetRequestInputsAndOutputsDumpDir());
+
+    // Load it into the runtime.
+    armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+        {
+            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, msg, cb);
+        }
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+        FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_0::ErrorStatus::NONE;
+    }
+
+    std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
+            new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
+                    netId,
+                    runtime.get(),
+                    options.GetRequestInputsAndOutputsDumpDir(),
+                    options.IsGpuProfilingEnabled(),
+                    options.isAsyncModelExecutionEnabled(),
+                    options.getNoOfArmnnThreads(),
+                    options.isImportEnabled(),
+                    options.isExportEnabled(),
+                    true));
+
     NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
 
+    ALOGV("ArmnnDriverImpl::prepareModelFromCache cache timing = %lld µs",
+          std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
+
     return V1_0::ErrorStatus::NONE;
 }
 
@@ -258,7 +677,7 @@ Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runt
                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
 
         // Set the base value for all operand types
-        #ifdef ARMNN_ANDROID_R
+        #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S)
         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({FLT_MAX, FLT_MAX});
         #else
         capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX});
@@ -337,7 +756,7 @@ Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runt
         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
 
         // Set the base value for all operand types
-        #ifdef ARMNN_ANDROID_R
+        #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S)
         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({0.f, 0.0f});
         #else
         capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f});
@@ -350,4 +769,4 @@ Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runt
 }
 
 } // namespace hal_1_2
-} // namespace armnn_driver
+} // namespace armnn_driver
+\ No newline at end of file
diff --git a/1.2/ArmnnDriverImpl.hpp b/1.2/ArmnnDriverImpl.hpp
index d5cf7f3..70f46cb 100644
--- a/1.2/ArmnnDriverImpl.hpp
+++ b/1.2/ArmnnDriverImpl.hpp
@@ -7,17 +7,21 @@
 
 #include <HalInterfaces.h>
 
+#include "../CacheDataHandler.hpp"
 #include "../DriverOptions.hpp"
-#include "NamespaceAdaptor.hpp"
 
 #include <armnn/ArmNN.hpp>
 
-#if ARMNN_ANDROID_S
-using namespace android::nn;
-#elif ARMNN_ANDROID_R
+#include <NeuralNetworks.h>
+
+#ifdef ARMNN_ANDROID_R
 using namespace android::nn::hal;
 #endif
 
+#ifdef ARMNN_ANDROID_S
+using namespace android::hardware;
+#endif
+
 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
 
@@ -29,16 +33,31 @@ namespace hal_1_2
 class ArmnnDriverImpl
 {
 public:
-    static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(const armnn::IRuntimePtr& runtime,
-                                                           const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
-                                                           const DriverOptions& options,
-                                                           const V1_2::Model& model,
-                                                           const android::sp<V1_2::IPreparedModelCallback>& cb,
-                                                           bool float32ToFloat16 = false);
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+    static Return<V1_0::ErrorStatus> prepareArmnnModel_1_2(
+        const armnn::IRuntimePtr& runtime,
+        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+        const DriverOptions& options,
+        const V1_2::Model& model,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb,
+        bool float32ToFloat16 = false);
+
+    static Return<V1_0::ErrorStatus> prepareModelFromCache(
+        const armnn::IRuntimePtr& runtime,
+        const DriverOptions& options,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb,
+        bool float32ToFloat16 = false);
 
     static Return<void> getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
                                             V1_2::IDevice::getCapabilities_1_2_cb cb);
 };
 
 } // namespace hal_1_2
-} // namespace armnn_driver
+} // namespace armnn_driver
+\ No newline at end of file
diff --git a/1.2/HalPolicy.cpp b/1.2/HalPolicy.cpp
index 9d49ff5..bfc467c 100644
--- a/1.2/HalPolicy.cpp
+++ b/1.2/HalPolicy.cpp
@@ -1,9 +1,10 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "HalPolicy.hpp"
+#include "DriverOptions.hpp"
 
 namespace armnn_driver
 {
@@ -17,14 +18,41 @@ namespace
 
 } // anonymous namespace
 
-bool HalPolicy::ConvertOperation(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+HalPolicy::DeviceType HalPolicy::GetDeviceTypeFromOptions(const DriverOptions& options)
+{
+        // Query backends list from the options
+        auto backends = options.GetBackends();
+        // Return first backend
+        if(backends.size()>0)
+        {
+            const auto &first_backend = backends[0];
+            if(first_backend.IsCpuAcc()||first_backend.IsCpuRef())
+            {
+                return V1_2::DeviceType::CPU;
+            }
+            else if(first_backend.IsGpuAcc())
+            {
+                return V1_2::DeviceType::GPU;
+            }
+            else
+            {
+                return V1_2::DeviceType::ACCELERATOR;
+            }
+        }
+        else
+        {
+            return V1_2::DeviceType::CPU;
+        }
+}
+
+bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model, ConversionData& data)
 {
     switch (operation.type)
     {
         case V1_2::OperationType::ABS:
             return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Abs);
         case V1_2::OperationType::ADD:
-            return ConvertAdd(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Add);
         case V1_2::OperationType::ARGMAX:
             return ConvertArgMinMax(operation, model, data, ArgMinMaxFunction::Max);
         case V1_2::OperationType::ARGMIN:
@@ -33,6 +61,10 @@ bool HalPolicy::ConvertOperation(const V1_2::Operation& operation, const V1_2::M
             return ConvertAveragePool2d(operation, model, data);
         case V1_2::OperationType::BATCH_TO_SPACE_ND:
             return ConvertBatchToSpaceNd(operation, model, data);
+        case V1_2::OperationType::CAST:
+            return ConvertCast(operation, model, data);
+        case V1_2::OperationType::CHANNEL_SHUFFLE:
+            return ConvertChannelShuffle(operation, model, data);
         case V1_2::OperationType::CONCATENATION:
             return ConvertConcatenation(operation, model, data);
         case V1_2::OperationType::CONV_2D:
@@ -44,7 +76,7 @@ bool HalPolicy::ConvertOperation(const V1_2::Operation& operation, const V1_2::M
         case V1_2::OperationType::DEQUANTIZE:
             return ConvertDequantize(operation, model, data);
         case V1_2::OperationType::DIV:
-            return ConvertDiv(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Div);
         case V1_2::OperationType::EQUAL:
             return ConvertComparison(operation, model, data, ComparisonOperation::Equal);
         case V1_2::OperationType::EXP:
@@ -75,6 +107,8 @@ bool HalPolicy::ConvertOperation(const V1_2::Operation& operation, const V1_2::M
             return ConvertComparison(operation, model, data, ComparisonOperation::LessOrEqual);
         case V1_2::OperationType::LOCAL_RESPONSE_NORMALIZATION:
             return ConvertLocalResponseNormalization(operation, model, data);
+        case V1_2::OperationType::LOG:
+            return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Log);
         case V1_2::OperationType::LOGISTIC:
             return ConvertLogistic(operation, model, data);
         case V1_2::OperationType::LOG_SOFTMAX:
@@ -84,13 +118,13 @@ bool HalPolicy::ConvertOperation(const V1_2::Operation& operation, const V1_2::M
         case V1_2::OperationType::MAX_POOL_2D:
             return ConvertMaxPool2d(operation, model, data);
         case V1_2::OperationType::MAXIMUM:
-            return ConvertMaximum(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Maximum);
         case V1_2::OperationType::MEAN:
             return ConvertMean(operation, model, data);
         case V1_2::OperationType::MINIMUM:
-            return ConvertMinimum(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Minimum);
         case V1_2::OperationType::MUL:
-            return ConvertMul(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Mul);
         case V1_2::OperationType::NEG:
             return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Neg);
         case V1_2::OperationType::NOT_EQUAL:
@@ -105,6 +139,14 @@ bool HalPolicy::ConvertOperation(const V1_2::Operation& operation, const V1_2::M
             return ConvertQuantize(operation, model, data);
         case V1_2::OperationType::QUANTIZED_16BIT_LSTM:
             return ConvertQuantized16BitLstm(operation, model, data);
+        case V1_2::OperationType::REDUCE_MAX:
+            return ConvertReduce(operation, model, data, ReduceOperation::Max);
+        case V1_2::OperationType::REDUCE_MIN:
+            return ConvertReduce(operation, model, data, ReduceOperation::Min);
+        case V1_2::OperationType::REDUCE_PROD:
+            return ConvertReduce(operation, model, data, ReduceOperation::Prod);
+        case V1_2::OperationType::REDUCE_SUM:
+            return ConvertReduce(operation, model, data, ReduceOperation::Sum);
         case V1_2::OperationType::RELU:
             return ConvertReLu(operation, model, data);
         case V1_2::OperationType::RELU1:
@@ -119,38 +161,36 @@ bool HalPolicy::ConvertOperation(const V1_2::Operation& operation, const V1_2::M
             return ConvertResize(operation, model, data, ResizeMethod::NearestNeighbor);
         case V1_2::OperationType::RSQRT:
             return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Rsqrt);
+        case V1_2::OperationType::SIN:
+            return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Sin);
+        case V1_2::OperationType::SOFTMAX:
+            return ConvertSoftmax(operation, model, data);
+        case V1_2::OperationType::SPACE_TO_BATCH_ND  :
+            return ConvertSpaceToBatchNd(operation, model, data);
+        case V1_2::OperationType::SPACE_TO_DEPTH:
+            return ConvertSpaceToDepth(operation, model, data);
         case V1_2::OperationType::SQRT:
             return ConvertSqrt(operation, model, data);
         case V1_2::OperationType::SQUEEZE:
             return ConvertSqueeze(operation, model, data);
         case V1_2::OperationType::STRIDED_SLICE:
             return ConvertStridedSlice(operation, model, data);
+        case V1_2::OperationType::SUB:
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Sub);
         case V1_2::OperationType::TRANSPOSE:
             return ConvertTranspose(operation, model, data);
         case V1_2::OperationType::TRANSPOSE_CONV_2D:
             return ConvertTransposeConv2d(operation, model, data);
-        case V1_2::OperationType::SOFTMAX:
-            return ConvertSoftmax(operation, model, data);
-        case V1_2::OperationType::SPACE_TO_BATCH_ND  :
-            return ConvertSpaceToBatchNd(operation, model, data);
-        case V1_2::OperationType::SPACE_TO_DEPTH:
-            return ConvertSpaceToDepth(operation, model, data);
-        case V1_2::OperationType::SUB:
-            return ConvertSub(operation, model, data);
         case V1_2::OperationType::TANH:
             return ConvertTanH(operation, model, data);
+        case V1_2::OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM:
+            return ConvertUnidirectionalSequenceLstm(operation, model, data);
         default:
             return Fail("%s: Operation type %s not supported in ArmnnDriver",
                         __func__, toString(operation.type).c_str());
     }
 }
 
-bool HalPolicy::ConvertAdd(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_2::HalPolicy::ConvertAdd()");
-    return ::ConvertAdd<hal_1_2::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertArgMinMax(const V1_2::Operation& operation,
                                  const V1_2::Model& model,
                                  ConversionData& data,
@@ -160,20 +200,32 @@ bool HalPolicy::ConvertArgMinMax(const V1_2::Operation& operation,
     return ::ConvertArgMinMax<hal_1_2::HalPolicy>(operation, model, data, argMinMaxFunction);
 }
 
-bool HalPolicy::ConvertAveragePool2d(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertAveragePool2d(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertAveragePool2d()");
     return ConvertPooling2d<hal_1_2::HalPolicy>(operation, __func__, PoolingAlgorithm::Average, model, data);
 }
 
-bool HalPolicy::ConvertBatchToSpaceNd(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertBatchToSpaceNd()");
     return ::ConvertBatchToSpaceNd<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertComparison(const V1_2::Operation& operation,
-                                  const V1_2::Model& model,
+bool HalPolicy::ConvertCast(const Operation& operation, const Model& model, ConversionData& data)
+{
+    ALOGV("hal_1_2::HalPolicy::ConvertCast()");
+    return ::ConvertCast<hal_1_2::HalPolicy>(operation, model, data);
+}
+
+bool HalPolicy::ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data)
+{
+    ALOGV("hal_1_2::HalPolicy::ConvertChannelShuffle()");
+    return ::ConvertChannelShuffle<hal_1_2::HalPolicy>(operation, model, data);
+}
+
+bool HalPolicy::ConvertComparison(const Operation& operation,
+                                  const Model& model,
                                   ConversionData& data,
                                   ComparisonOperation comparisonOperation)
 {
@@ -181,44 +233,47 @@ bool HalPolicy::ConvertComparison(const V1_2::Operation& operation,
     return ::ConvertComparison_1_2<hal_1_2::HalPolicy>(operation, model, data, comparisonOperation);
 }
 
-bool HalPolicy::ConvertConcatenation(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertConcatenation()");
     return ::ConvertConcatenation<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertConv2d(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertConv2d(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertConv2d()");
     return ::ConvertConv2d_1_2<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertDepthToSpace(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertDepthToSpace(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertDepthToSpace()");
     return ::ConvertDepthToSpace<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertDepthwiseConv2d(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertDepthwiseConv2d(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertDepthwiseConv2d()");
     return ::ConvertDepthwiseConv2d_1_2<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertDequantize(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertDequantize()");
     return ::ConvertDequantize_1_2<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertDiv(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         BinaryOperation binaryOperation)
 {
-    ALOGV("hal_1_2::HalPolicy::ConvertDiv()");
-    return ::ConvertDiv<hal_1_2::HalPolicy>(operation, model, data);
+    ALOGV("hal_1_2::HalPolicy::ConvertElementwiseBinary()");
+    return ::ConvertElementwiseBinary<hal_1_2::HalPolicy>(operation, model, data, binaryOperation);
 }
 
 bool HalPolicy::ConvertElementwiseUnary(const Operation& operation,
-                                        const V1_2::Model& model,
+                                        const Model& model,
                                         ConversionData& data,
                                         UnaryOperation unaryOperation)
 {
@@ -226,19 +281,19 @@ bool HalPolicy::ConvertElementwiseUnary(const Operation& operation,
     return ::ConvertElementwiseUnary<hal_1_2::HalPolicy>(operation, model, data, unaryOperation);
 }
 
-bool HalPolicy::ConvertExpandDims(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertExpandDims(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertExpandDims()");
     return ::ConvertExpandDims<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertFloor(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertFloor(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertFloor()");
     return ::ConvertFloor<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertFullyConnected(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertFullyConnected(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertFullyConnected()");
     return ::ConvertFullyConnected<hal_1_2::HalPolicy>(operation, model, data);
@@ -300,30 +355,12 @@ bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model,
     return ConvertPooling2d<hal_1_2::HalPolicy>(operation, __func__, PoolingAlgorithm::Max, model, data);
 }
 
-bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_2::HalPolicy::ConvertMaximum()");
-    return ::ConvertMaximum<hal_1_2::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertMean(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertMean()");
     return ::ConvertMean<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertMinimum(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_2::HalPolicy::ConvertMinimum()");
-    return ::ConvertMinimum<hal_1_2::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertMul(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_2::HalPolicy::ConvertMul()");
-    return ::ConvertMul<hal_1_2::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertPad()");
@@ -354,32 +391,41 @@ bool HalPolicy::ConvertQuantized16BitLstm(const Operation& operation, const Mode
     return ::ConvertQuantized16BitLstm<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertReLu(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertReduce(const Operation& operation,
+                              const Model& model,
+                              ConversionData& data,
+                              ReduceOperation reduceOperation)
+{
+    ALOGV("hal_1_2::HalPolicy::ConvertReduce()");
+    return ::ConvertReduce<hal_1_2::HalPolicy>(operation, model, data, reduceOperation);
+}
+
+bool HalPolicy::ConvertReLu(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertReLu()");
     return ::ConvertReLu<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertReLu1(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertReLu1()");
     return ::ConvertReLu1<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertReLu6(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertReLu6(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertReLu6()");
     return ::ConvertReLu6<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertReshape(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertReshape(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertReshape()");
     return ::ConvertReshape<hal_1_2::HalPolicy>(operation, model, data);
 }
 
 bool HalPolicy::ConvertResize(const Operation& operation,
-                              const V1_2::Model& model,
+                              const Model& model,
                               ConversionData& data,
                               ResizeMethod resizeMethod)
 {
@@ -387,31 +433,25 @@ bool HalPolicy::ConvertResize(const Operation& operation,
     return ::ConvertResize<hal_1_2::HalPolicy>(operation, model, data, resizeMethod);
 }
 
-bool HalPolicy::ConvertSpaceToBatchNd(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertSpaceToBatchNd(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertSpaceToBatchNd()");
     return ::ConvertSpaceToBatchNd<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertSpaceToDepth(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertSpaceToDepth(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertSpaceToDepth()");
     return ::ConvertSpaceToDepth<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertSoftmax(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertSoftmax()");
     return ::ConvertSoftmax<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertSub(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_2::HalPolicy::ConvertSub()");
-    return ::ConvertSub<hal_1_2::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertTanH(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertTanH()");
     return ::ConvertTanH<hal_1_2::HalPolicy>(operation, model, data);
@@ -423,7 +463,7 @@ bool HalPolicy::ConvertLstm(const Operation& operation, const Model& model, Conv
     return ::ConvertLstm<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertSqrt(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertSqrt(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertSqrt()");
     ActivationDescriptor desc;
@@ -432,29 +472,35 @@ bool HalPolicy::ConvertSqrt(const V1_2::Operation& operation, const V1_2::Model&
     return ::ConvertToActivation<hal_1_2::HalPolicy>(operation, __func__, desc, model, data);
 }
 
-bool HalPolicy::ConvertSqueeze(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertSqueeze(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertSqueeze()");
     return ::ConvertSqueeze<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertStridedSlice(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertStridedSlice()");
     return ::ConvertStridedSlice<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertTranspose(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertTranspose()");
     return ::ConvertTranspose<hal_1_2::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertTransposeConv2d(const V1_2::Operation& operation, const V1_2::Model& model, ConversionData& data)
+bool HalPolicy::ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_2::HalPolicy::ConvertTransposeConv2d()");
     return ::ConvertTransposeConv2d<hal_1_2::HalPolicy>(operation, model, data);
 }
 
+bool HalPolicy::ConvertUnidirectionalSequenceLstm(const Operation& operation, const Model& model, ConversionData& data)
+{
+    ALOGV("hal_1_2::HalPolicy::ConvertUnidirectionalSequenceLstm()");
+    return ::ConvertUnidirectionalSequenceLstm<hal_1_2::HalPolicy>(operation, model, data);
+}
+
 } // namespace hal_1_2
 } // namespace armnn_driver
diff --git a/1.2/HalPolicy.hpp b/1.2/HalPolicy.hpp
index be02c22..4121ec9 100644
--- a/1.2/HalPolicy.hpp
+++ b/1.2/HalPolicy.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2019-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -16,6 +16,7 @@ namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
 
 namespace armnn_driver
 {
+class DriverOptions;
 namespace hal_1_2
 {
 
@@ -31,12 +32,13 @@ public:
     using ExecutionCallback         = V1_2::IExecutionCallback;
     using getSupportedOperations_cb = V1_2::IDevice::getSupportedOperations_1_2_cb;
     using ErrorStatus               = V1_0::ErrorStatus;
+    using DeviceType                = V1_2::DeviceType;
+
+    static DeviceType GetDeviceTypeFromOptions(const DriverOptions& options);
 
     static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
 
 private:
-    static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertArgMinMax(const Operation& operation,
                                  const Model& model,
                                  ConversionData& data,
@@ -46,6 +48,10 @@ private:
 
     static bool ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data);
 
+    static bool ConvertCast(const Operation& operation, const Model& model, ConversionData& data);
+
+    static bool ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data);
+
     static bool ConvertComparison(const Operation& operation,
                                   const Model& model,
                                   ConversionData& data,
@@ -61,10 +67,13 @@ private:
 
     static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertExpandDims(const Operation& operation, const Model& model, ConversionData& data);
 
+    static bool ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         armnn::BinaryOperation binaryOperation);
+
     static bool ConvertElementwiseUnary(const Operation& operation,
                                         const Model& model,
                                         ConversionData& data,
@@ -96,14 +105,8 @@ private:
 
     static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data);
-
-    static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data);
@@ -114,6 +117,11 @@ private:
 
     static bool ConvertQuantized16BitLstm(const Operation& operation, const Model& model, ConversionData& data);
 
+    static bool ConvertReduce(const Operation& operation,
+                              const Model& model,
+                              ConversionData& data,
+                              ReduceOperation reduce_operation);
+
     static bool ConvertReLu(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data);
@@ -139,13 +147,15 @@ private:
 
     static bool ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertTanH(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data);
+
+    static bool ConvertUnidirectionalSequenceLstm(const Operation& operation,
+                                                  const Model& model,
+                                                  ConversionData& data);
 };
 
 } // namespace hal_1_2
diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp
index 8292d69..6d2e0b7 100644
--- a/1.3/ArmnnDriver.hpp
+++ b/1.3/ArmnnDriver.hpp
@@ -21,6 +21,8 @@
 #include "../1.0/ArmnnDriverImpl.hpp"
 #include "../1.0/HalPolicy.hpp"
 
+#include <armnn/BackendHelper.hpp>
+
 #include <log/log.h>
 
 namespace armnn_driver
@@ -31,6 +33,7 @@ namespace hal_1_3
 class ArmnnDriver : public ArmnnDevice, public V1_3::IDevice
 {
 public:
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
 
     ArmnnDriver(DriverOptions options)
         : ArmnnDevice(std::move(options))
@@ -39,9 +42,7 @@ public:
     }
     ~ArmnnDriver() {}
 
-    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
 
-public:
     Return<void> getCapabilities(V1_0::IDevice::getCapabilities_cb cb) override
     {
         ALOGV("hal_1_3::ArmnnDriver::getCapabilities()");
@@ -131,10 +132,13 @@ public:
                                                                                          cb);
     }
 
-    Return<V1_0::ErrorStatus> prepareModel_1_2(const V1_2::Model& model, V1_1::ExecutionPreference preference,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-            const android::hardware::hidl_vec<android::hardware::hidl_handle>&, const HidlToken&,
-            const android::sp<V1_2::IPreparedModelCallback>& cb)
+    Return<V1_0::ErrorStatus> prepareModel_1_2(
+        const V1_2::Model& model,
+        V1_1::ExecutionPreference preference,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_2()");
 
@@ -151,6 +155,9 @@ public:
                                                                m_ClTunedParameters,
                                                                m_Options,
                                                                model,
+                                                               modelCacheHandle,
+                                                               dataCacheHandle,
+                                                               token,
                                                                cb,
                                                                model.relaxComputationFloat32toFloat16
                                                                && m_Options.GetFp16Enabled());
@@ -174,14 +181,15 @@ public:
                                                                                          cb);
     }
 
-    Return<V1_3::ErrorStatus> prepareModel_1_3(const V1_3::Model& model,
-                                               V1_1::ExecutionPreference preference,
-                                               V1_3::Priority priority,
-                                               const V1_3::OptionalTimePoint&,
-                                               const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-                                               const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-                                               const HidlToken&,
-                                               const android::sp<V1_3::IPreparedModelCallback>& cb)
+    Return<V1_3::ErrorStatus> prepareModel_1_3(
+        const V1_3::Model& model,
+        V1_1::ExecutionPreference preference,
+        V1_3::Priority priority,
+        const V1_3::OptionalTimePoint&,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCache,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCache,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModel_1_3()");
 
@@ -199,11 +207,13 @@ public:
             return V1_3::ErrorStatus::INVALID_ARGUMENT;
         }
 
-
         return ArmnnDriverImpl::prepareArmnnModel_1_3(m_Runtime,
                                                       m_ClTunedParameters,
                                                       m_Options,
                                                       model,
+                                                      modelCache,
+                                                      dataCache,
+                                                      token,
                                                       cb,
                                                       model.relaxComputationFloat32toFloat16
                                                       && m_Options.GetFp16Enabled(),
@@ -219,10 +229,13 @@ public:
 
     Return<void> getNumberOfCacheFilesNeeded(getNumberOfCacheFilesNeeded_cb cb)
     {
-        ALOGV("hal_1_3::ArmnnDriver::getSupportedExtensions()");
-
-        // Set both numbers to be 0 for cache not supported.
-        cb(V1_0::ErrorStatus::NONE, 0, 0);
+        ALOGV("hal_1_3::ArmnnDriver::getNumberOfCacheFilesNeeded()");
+        unsigned int numberOfCachedModelFiles = 0;
+        for (auto& backend : m_Options.GetBackends())
+        {
+            numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+        }
+        cb(V1_0::ErrorStatus::NONE, numberOfCachedModelFiles,   1ul);
         return Void();
     }
 
@@ -244,38 +257,47 @@ public:
     Return<void> getType(getType_cb cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::getType()");
-
-        cb(V1_0::ErrorStatus::NONE, V1_2::DeviceType::CPU);
+        const auto device_type = hal_1_2::HalPolicy::GetDeviceTypeFromOptions(this->m_Options);
+        cb(V1_0::ErrorStatus::NONE, device_type);
         return Void();
     }
 
     Return<V1_0::ErrorStatus> prepareModelFromCache(
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const HidlToken&,
-        const sp<V1_2::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_2::IPreparedModelCallback>& cb)
     {
         ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_2(V1_0::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_0::ErrorStatus::GENERAL_FAILURE;
+        return hal_1_2::ArmnnDriverImpl::prepareModelFromCache(m_Runtime,
+                                                               m_Options,
+                                                               modelCacheHandle,
+                                                               dataCacheHandle,
+                                                               token,
+                                                               cb);
     }
 
     Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
         const V1_3::OptionalTimePoint&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const android::hardware::hidl_vec<android::hardware::hidl_handle>&,
-        const HidlToken&,
-        const sp<V1_3::IPreparedModelCallback>& callback)
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb)
     {
-        ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache()");
-        callback->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
-        return V1_3::ErrorStatus::GENERAL_FAILURE;
+        ALOGV("hal_1_3::ArmnnDriver::prepareModelFromCache_1_3()");
+
+        return ArmnnDriverImpl::prepareModelFromCache_1_3(m_Runtime,
+                                                          m_Options,
+                                                          modelCacheHandle,
+                                                          dataCacheHandle,
+                                                          token,
+                                                          cb);
     }
 
     Return<void> allocate(const V1_3::BufferDesc& /*desc*/,
-                          const android::hardware::hidl_vec<sp<V1_3::IPreparedModel>>& /*preparedModels*/,
-                          const android::hardware::hidl_vec<V1_3::BufferRole>& /*inputRoles*/,
-                          const android::hardware::hidl_vec<V1_3::BufferRole>& /*outputRoles*/,
+                          const hidl_vec<android::sp<V1_3::IPreparedModel>>& /*preparedModels*/,
+                          const hidl_vec<V1_3::BufferRole>& /*inputRoles*/,
+                          const hidl_vec<V1_3::BufferRole>& /*outputRoles*/,
                           allocate_cb cb) {
         ALOGV("hal_1_3::ArmnnDriver::allocate()");
         cb(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr, 0);
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp
index b2524d3..ec176d5 100644
--- a/1.3/ArmnnDriverImpl.cpp
+++ b/1.3/ArmnnDriverImpl.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020, 2023 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -8,8 +8,13 @@
 #include "../ModelToINetworkConverter.hpp"
 #include "../SystemPropertiesUtils.hpp"
 
+#include <armnnDeserializer/IDeserializer.hpp>
+
 #include <log/log.h>
 
+#include <sys/stat.h>
+#include <chrono>
+
 namespace
 {
 const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
@@ -66,9 +71,9 @@ const char *g_OperandTypeInt32PerformanceExecTime           = "Armnn.operandType
 const char *g_OperandTypeInt32PerformancePowerUsage         = "Armnn.operandTypeInt32Performance.powerUsage";
 
 
-void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,
+void NotifyCallbackAndCheck(const android::sp<V1_3::IPreparedModelCallback>& callback,
                             V1_3::ErrorStatus errorStatus,
-                            const sp<V1_3::IPreparedModel>& preparedModelPtr)
+                            const android::sp<V1_3::IPreparedModel>& preparedModelPtr)
 {
     Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
     // This check is required, if the callback fails and it isn't checked it will bring down the service
@@ -81,7 +86,7 @@ void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,
 
 Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
                                            const std::string& message,
-                                           const sp<V1_3::IPreparedModelCallback>& callback)
+                                           const android::sp<V1_3::IPreparedModelCallback>& callback)
 {
     ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
     NotifyCallbackAndCheck(callback, error, nullptr);
@@ -100,12 +105,17 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
        const DriverOptions& options,
        const V1_3::Model& model,
-       const sp<V1_3::IPreparedModelCallback>& cb,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+       const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+       const HidlToken& token,
+       const android::sp<V1_3::IPreparedModelCallback>& cb,
        bool float32ToFloat16,
        V1_3::Priority priority)
 {
     ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
 
+    std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
+
     if (cb.get() == nullptr)
     {
         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
@@ -136,21 +146,67 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
         return V1_3::ErrorStatus::NONE;
     }
 
+    // Serialize the network graph to a .armnn file if an output directory
+    // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
+    bool serializeToFile = dataCacheHandle.size() < 1 ? false : true;
+    auto serializedNetworkFileName =
+        SerializeNetwork(*modelConverter.GetINetwork(),
+                         options.GetRequestInputsAndOutputsDumpDir(),
+                         dataCacheData,
+                         serializeToFile);
+
     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
-    armnn::OptimizerOptions OptOptions;
-    OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+    armnn::OptimizerOptionsOpaque OptOptions;
+    OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
+    OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+    int cachedFd = -1;
+    bool saveCachedNetwork = options.SaveCachedNetwork();
+
+    unsigned int numberOfCachedModelFiles = 0;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                numberOfCachedModelFiles += numberOfCacheFiles;
+                if (modelCacheHandle[index]->numFds == 1)
+                {
+                    // For GpuAcc numberOfCachedFiles is 1
+                    if (backend == armnn::Compute::GpuAcc)
+                    {
+                        cachedFd = modelCacheHandle[index]->data[0];
+                        saveCachedNetwork = true;
+                    }
+                }
+                index += numberOfCachedModelFiles;
+            }
+        }
+    }
 
     armnn::BackendOptions gpuAcc("GpuAcc",
     {
-        { "FastMathEnabled", options.IsFastMathEnabled() }
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "SaveCachedNetwork", saveCachedNetwork },
+        { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() },
+        { "CachedFileDescriptor", cachedFd }
     });
+
     armnn::BackendOptions cpuAcc("CpuAcc",
     {
-        { "FastMathEnabled", options.IsFastMathEnabled() }
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "NumberOfThreads", options.GetNumberOfThreads() }
     });
-    OptOptions.m_ModelOptions.push_back(gpuAcc);
-    OptOptions.m_ModelOptions.push_back(cpuAcc);
+    OptOptions.AddModelOption(gpuAcc);
+    OptOptions.AddModelOption(cpuAcc);
 
     std::vector<std::string> errMessages;
     try
@@ -189,9 +245,17 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
 
     // Load it into the runtime.
     armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    auto numInputs  = getMainModel(model).inputIndexes.size();
+    auto numOutputs = getMainModel(model).outputIndexes.size();
     try
     {
-        if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
         }
@@ -204,11 +268,12 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
         return V1_3::ErrorStatus::NONE;
     }
 
-    // Now that we have a networkId for the graph rename the dump file to use it
-    // so that we can associate the graph file and the input/output tensor dump files
-    RenameGraphDotFile(dotGraphFileName,
-                       options.GetRequestInputsAndOutputsDumpDir(),
-                       netId);
+    // Now that we have a networkId for the graph rename the exported files to use it
+    // so that we can associate the graph file and the input/output tensor exported files
+    RenameExportedFiles(serializedNetworkFileName,
+                        dotGraphFileName,
+                        options.GetRequestInputsAndOutputsDumpDir(),
+                        netId);
 
     std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
             new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
@@ -217,32 +282,390 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
                     model,
                     options.GetRequestInputsAndOutputsDumpDir(),
                     options.IsGpuProfilingEnabled(),
-                    priority));
+                    priority,
+                    options.isAsyncModelExecutionEnabled(),
+                    options.getNoOfArmnnThreads(),
+                    options.isImportEnabled(),
+                    options.isExportEnabled()));
 
     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
-    if (!preparedModel->ExecuteWithDummyInputs())
+    // Only run this if the GpuAcc backend has been added to options
+    if (std::find(options.GetBackends().begin(),
+                  options.GetBackends().end(),
+                  armnn::Compute::GpuAcc) != options.GetBackends().end())
+    {
+        if (!preparedModel->ExecuteWithDummyInputs(numInputs, numOutputs))
+        {
+            return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        }
+
+        if (clTunedParameters &&
+            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+        {
+            // Now that we've done one inference the CL kernel parameters will have been tuned,
+            // so save the updated file.
+            try
+            {
+                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            }
+            catch (std::exception& error)
+            {
+                ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+                      options.GetClTunedParametersFile().c_str(), error.what());
+            }
+        }
+    }
+    size_t hashValue = 0;
+    // Cache the model
+    if (dataCacheHandle.size() > 0)
     {
-        return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
+        // Cache the Arm NN model
+        if (dataCacheHandle.size() != 1)
+        {
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        if (dataCacheHandle[0]->numFds != 1)
+        {
+            ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, numFds != 1.");
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        if (dataCacheHandle[0]->data[0] < 0)
+        {
+            ALOGW("ArmnnDriverImpl::prepareArmnnModel_1_3: Cannot cache the data, fd < 0");
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+        if (dataCacheFileAccessMode != O_RDWR)
+        {
+            ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Invalid Access Mode.");
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
+        }
+
+        write(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size());
+        hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
     }
 
-    if (clTunedParameters &&
-        options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
+    // Cache the model data
+    if (modelCacheHandle.size() > 0)
     {
-        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
-        try
+        if (modelCacheHandle.size() != numberOfCachedModelFiles)
         {
-            clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+            return V1_3::ErrorStatus::NONE;
         }
-        catch (std::exception& error)
+
+        for (uint32_t i = 0; i < modelCacheHandle.size(); ++i)
         {
-            ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
-                  options.GetClTunedParametersFile().c_str(), error.what());
+            if (modelCacheHandle[i]->numFds == 1)
+            {
+                int modelCacheFileAccessMode = fcntl(modelCacheHandle[i]->data[0], F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDONLY)
+                {
+                    struct stat statBuffer;
+                    if (fstat(modelCacheHandle[i]->data[0], &statBuffer) == 0)
+                    {
+                        long modelDataSize = statBuffer.st_size;
+                        if (modelDataSize > 0)
+                        {
+                            std::vector<uint8_t> modelData(modelDataSize);
+                            pread(modelCacheHandle[i]->data[0], modelData.data(), modelData.size(), 0);
+                            hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+                        }
+                    }
+                }
+            }
         }
     }
+    if (hashValue != 0)
+    {
+        CacheDataHandlerInstance().Register(token, hashValue, dataCacheData.size());
+    }
 
     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
 
+    ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
+         (std::chrono::system_clock::now() - prepareModelTimepoint).count());
+
+
+    return V1_3::ErrorStatus::NONE;
+}
+
+Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareModelFromCache_1_3(
+    const armnn::IRuntimePtr& runtime,
+    const DriverOptions& options,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+    const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+    const HidlToken& token,
+    const android::sp<V1_3::IPreparedModelCallback>& cb)
+{
+    ALOGV("ArmnnDriverImpl::prepareModelFromCache_1_3()");
+    std::chrono::time_point<std::chrono::system_clock> modelFromCacheTimepoint = std::chrono::system_clock::now();
+
+    if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (cb.get() == nullptr)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid callback passed to prepareModelFromCache_1_3");
+        return V1_3::ErrorStatus::INVALID_ARGUMENT;
+    }
+
+    if (!runtime)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Device unavailable");
+        return V1_3::ErrorStatus::DEVICE_UNAVAILABLE;
+    }
+
+    // DataCacheHandle size should always be 1
+    // Arm NN model
+    if (dataCacheHandle.size() != 1)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Check if model files cached they match the expected value
+    unsigned int numberOfCachedModelFiles = 0;
+    for (auto& backend : options.GetBackends())
+    {
+        numberOfCachedModelFiles += GetNumberOfCacheFiles(backend);
+    }
+    if (modelCacheHandle.size() != numberOfCachedModelFiles)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (dataCacheHandle[0]->numFds != 1)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, numFds != 1.");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    if (dataCacheHandle[0]->data[0] < 0)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the cache data, fd < 0");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int dataCacheFileAccessMode = fcntl(dataCacheHandle[0]->data[0], F_GETFL) & O_ACCMODE;
+    if (dataCacheFileAccessMode != O_RDWR)
+    {
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    auto dataSize = CacheDataHandlerInstance().GetCacheSize(token);
+    if (dataSize == 0)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    int offset = 0;
+    {
+        struct stat statBuffer;
+        if (fstat(dataCacheHandle[0]->data[0], &statBuffer) == 0)
+        {
+            unsigned long bufferSize = statBuffer.st_size;
+            if (bufferSize != dataSize)
+            {
+                ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Invalid data to deserialize!");
+                cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                return V1_3::ErrorStatus::GENERAL_FAILURE;
+            }
+        }
+    }
+    std::vector<uint8_t> dataCacheData(dataSize);
+    pread(dataCacheHandle[0]->data[0], dataCacheData.data(), dataCacheData.size(), offset);
+    auto hashValue = CacheDataHandlerInstance().Hash(dataCacheData);
+
+    int gpuAccCachedFd = -1;
+    bool saveCachedNetwork = false;
+    if (modelCacheHandle.size() > 0)
+    {
+        unsigned int index = 0;
+        for (auto& backend : options.GetBackends())
+        {
+            // modelCacheHandle size should be equal to numberOfCachedModelFiles
+            // modelCacheHandle vector should be in same order as backends
+            auto numberOfCacheFiles = GetNumberOfCacheFiles(backend);
+            if (numberOfCacheFiles > 0)
+            {
+                if (modelCacheHandle[index]->numFds != 1)
+                {
+                    ALOGW(
+                       "ArmnnDriverImpl::prepareModelFromCache_1_3(): Cannot read from the model cache, numFds != 1.");
+                    cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                    return V1_3::ErrorStatus::GENERAL_FAILURE;
+                }
+                auto cachedFd = modelCacheHandle[index]->data[0];
+
+                int modelCacheFileAccessMode = fcntl(cachedFd, F_GETFL) & O_ACCMODE;
+                if (modelCacheFileAccessMode != O_RDWR)
+                {
+                    cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                    return V1_3::ErrorStatus::GENERAL_FAILURE;
+                }
+
+                struct stat statBuffer;
+                if (cachedFd != -1 && fstat(cachedFd, &statBuffer) == 0)
+                {
+                    long modelDataSize = statBuffer.st_size;
+                    if (modelDataSize <= 0)
+                    {
+                        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3(): Wrong cached model size!");
+                        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+                        return V1_3::ErrorStatus::NONE;
+                    }
+                    std::vector<uint8_t> modelData(modelDataSize);
+                    pread(cachedFd, modelData.data(), modelData.size(), 0);
+                    hashValue ^= CacheDataHandlerInstance().Hash(modelData);
+
+                    // For GpuAcc numberOfCachedFiles is 1
+                    if (backend == armnn::Compute::GpuAcc)
+                    {
+                        gpuAccCachedFd = cachedFd;
+                    }
+                }
+                index += numberOfCacheFiles;
+            }
+        }
+    }
+
+    if (!CacheDataHandlerInstance().Validate(token, hashValue, dataCacheData.size()))
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: ValidateHash() failed!");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Deserialize the network..
+    armnn::INetworkPtr network = armnn::INetworkPtr(nullptr, [](armnn::INetwork*){});
+    try
+    {
+        network = armnnDeserializer::IDeserializer::Create()->CreateNetworkFromBinary(dataCacheData);
+    }
+    catch (std::exception&)
+    {
+        ALOGW("ArmnnDriverImpl::prepareModelFromCache_1_3: Exception caught from Deserializer!");
+        cb->notify_1_3(V1_3::ErrorStatus::GENERAL_FAILURE, nullptr);
+        return V1_3::ErrorStatus::GENERAL_FAILURE;
+    }
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
+    armnn::OptimizerOptionsOpaque OptOptions;
+    OptOptions.SetReduceFp32ToFp16(options.GetFp16Enabled());
+    OptOptions.SetProfilingEnabled(options.IsGpuProfilingEnabled());
+
+    armnn::BackendOptions gpuAcc("GpuAcc",
+                                 {
+                                         {"FastMathEnabled",       options.IsFastMathEnabled()},
+                                         {"SaveCachedNetwork",     saveCachedNetwork},
+                                         {"CachedNetworkFilePath", options.GetCachedNetworkFilePath()},
+                                         {"MLGOTuningFilePath",    options.GetClMLGOTunedParametersFile()},
+                                         {"CachedFileDescriptor",  gpuAccCachedFd}
+                                 });
+
+    armnn::BackendOptions cpuAcc("CpuAcc",
+                                 {
+                                         {"FastMathEnabled", options.IsFastMathEnabled()},
+                                         {"NumberOfThreads", options.GetNumberOfThreads()}
+                                 });
+    OptOptions.AddModelOption(gpuAcc);
+    OptOptions.AddModelOption(cpuAcc);
+
+    std::vector<std::string> errMessages;
+    try
+    {
+        optNet = armnn::Optimize(*network.get(),
+                                 options.GetBackends(),
+                                 runtime->GetDeviceSpec(),
+                                 OptOptions,
+                                 errMessages);
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from optimize.";
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    // Check that the optimized network is valid.
+    if (!optNet)
+    {
+        std::stringstream message;
+        message << "Invalid optimized network";
+        for (const std::string& msg : errMessages)
+        {
+            message << "\n" << msg;
+        }
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    // Export the optimized network graph to a dot file if an output dump directory
+    // has been specified in the drivers' arguments.
+    std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
+                                                               options.GetRequestInputsAndOutputsDumpDir());
+
+    // Load it into the runtime.
+    armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                MemorySource::Undefined,
+                                                MemorySource::Undefined,
+                                                options.IsGpuProfilingEnabled());
+
+    try
+    {
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
+        {
+            return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, msg, cb);
+        }
+    }
+    catch (std::exception& e)
+    {
+        std::stringstream message;
+        message << "Exception (" << e.what() << ") caught from LoadNetwork.";
+        FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
+            new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(netId,
+                                                           runtime.get(),
+                                                           options.GetRequestInputsAndOutputsDumpDir(),
+                                                           options.IsGpuProfilingEnabled(),
+                                                           V1_3::Priority::MEDIUM,
+                                                           options.isAsyncModelExecutionEnabled(),
+                                                           options.getNoOfArmnnThreads(),
+                                                           options.isImportEnabled(),
+                                                           options.isExportEnabled(),
+                                                           true));
+
+    NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
+
+    ALOGV("ArmnnDriverImpl::prepareModelFromCache timing = %lld µs",
+          std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - modelFromCacheTimepoint).count());
+
     return V1_3::ErrorStatus::NONE;
 }
 
diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp
index f33a294..a482eda 100644
--- a/1.3/ArmnnDriverImpl.hpp
+++ b/1.3/ArmnnDriverImpl.hpp
@@ -7,11 +7,18 @@
 
 #include <HalInterfaces.h>
 
+#include "../CacheDataHandler.hpp"
 #include "../DriverOptions.hpp"
 
 #include <armnn/ArmNN.hpp>
 
-using namespace android::nn;
+#if !defined(ARMNN_ANDROID_S)
+using namespace android::nn::hal;
+#endif
+
+#ifdef ARMNN_ANDROID_S
+using namespace android::hardware;
+#endif
 
 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
 namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
@@ -25,17 +32,31 @@ namespace hal_1_3
 class ArmnnDriverImpl
 {
 public:
-    static android::hardware::Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(const armnn::IRuntimePtr& runtime,
-                                                           const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
-                                                           const DriverOptions& options,
-                                                           const V1_3::Model& model,
-                                                           const android::sp<V1_3::IPreparedModelCallback>& cb,
-                                                           bool float32ToFloat16 = false,
-                                                           V1_3::Priority priority = V1_3::Priority::MEDIUM);
-
-    static android::hardware::Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
+    using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+    static Return<V1_3::ErrorStatus> prepareArmnnModel_1_3(
+        const armnn::IRuntimePtr& runtime,
+        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
+        const DriverOptions& options,
+        const V1_3::Model& model,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb,
+        bool float32ToFloat16 = false,
+        V1_3::Priority priority = V1_3::Priority::MEDIUM);
+
+    static Return<V1_3::ErrorStatus> prepareModelFromCache_1_3(
+        const armnn::IRuntimePtr& runtime,
+        const DriverOptions& options,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& modelCacheHandle,
+        const android::hardware::hidl_vec<android::hardware::hidl_handle>& dataCacheHandle,
+        const HidlToken& token,
+        const android::sp<V1_3::IPreparedModelCallback>& cb);
+
+    static Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
                                             V1_3::IDevice::getCapabilities_1_3_cb cb);
 };
 
 } // namespace hal_1_3
-} // namespace armnn_driver
+} // namespace armnn_driver
+\ No newline at end of file
diff --git a/1.3/HalPolicy.cpp b/1.3/HalPolicy.cpp
index 53b67c7..16b325b 100644
--- a/1.3/HalPolicy.cpp
+++ b/1.3/HalPolicy.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -24,7 +24,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
         case V1_3::OperationType::ABS:
             return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Abs);
         case V1_3::OperationType::ADD:
-            return ConvertAdd(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Add);
         case V1_3::OperationType::ARGMAX:
             return ConvertArgMinMax(operation, model, data, ArgMinMaxFunction::Max);
         case V1_3::OperationType::ARGMIN:
@@ -33,6 +33,10 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
             return ConvertAveragePool2d(operation, model, data);
         case V1_3::OperationType::BATCH_TO_SPACE_ND:
             return ConvertBatchToSpaceNd(operation, model, data);
+        case V1_3::OperationType::CAST:
+            return ConvertCast(operation, model, data);
+        case V1_3::OperationType::CHANNEL_SHUFFLE:
+            return ConvertChannelShuffle(operation, model, data);
         case V1_3::OperationType::CONCATENATION:
             return ConvertConcatenation(operation, model, data);
         case V1_3::OperationType::CONV_2D:
@@ -44,7 +48,7 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
         case V1_3::OperationType::DEQUANTIZE:
             return ConvertDequantize(operation, model, data);
         case V1_3::OperationType::DIV:
-            return ConvertDiv(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Div);
         case V1_3::OperationType::ELU:
             return ConvertElu(operation, model, data);
         case V1_3::OperationType::EQUAL:
@@ -81,6 +85,14 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
             return ConvertComparison(operation, model, data, ComparisonOperation::LessOrEqual);
         case V1_3::OperationType::LOCAL_RESPONSE_NORMALIZATION:
             return ConvertLocalResponseNormalization(operation, model, data);
+        case V1_3::OperationType::LOG:
+            return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Log);
+        case V1_3::OperationType::LOGICAL_AND:
+            return ConvertLogicalBinary(operation, model, data, LogicalBinaryOperation::LogicalAnd);
+        case V1_3::OperationType::LOGICAL_NOT:
+            return ConvertElementwiseUnary(operation, model, data, UnaryOperation::LogicalNot);
+        case V1_3::OperationType::LOGICAL_OR:
+            return ConvertLogicalBinary(operation, model, data, LogicalBinaryOperation::LogicalOr);
         case V1_3::OperationType::LOGISTIC:
             return ConvertLogistic(operation, model, data);
         case V1_3::OperationType::LOG_SOFTMAX:
@@ -90,13 +102,13 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
         case V1_3::OperationType::MAX_POOL_2D:
             return ConvertMaxPool2d(operation, model, data);
         case V1_3::OperationType::MAXIMUM:
-            return ConvertMaximum(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Maximum);
         case V1_3::OperationType::MEAN:
             return ConvertMean(operation, model, data);
         case V1_3::OperationType::MINIMUM:
-            return ConvertMinimum(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Minimum);
         case V1_3::OperationType::MUL:
-            return ConvertMul(operation, model, data);
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Mul);
         case V1_3::OperationType::NEG:
             return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Neg);
         case V1_3::OperationType::NOT_EQUAL:
@@ -115,6 +127,14 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
             return ConvertQuantized16BitLstm(operation, model, data);
         case V1_3::OperationType::RANK:
             return ConvertRank(operation, model, data);
+        case V1_3::OperationType::REDUCE_MAX:
+            return ConvertReduce(operation, model, data, ReduceOperation::Max);
+        case V1_3::OperationType::REDUCE_MIN:
+            return ConvertReduce(operation, model, data, ReduceOperation::Min);
+        case V1_3::OperationType::REDUCE_PROD:
+            return ConvertReduce(operation, model, data, ReduceOperation::Prod);
+        case V1_3::OperationType::REDUCE_SUM:
+            return ConvertReduce(operation, model, data, ReduceOperation::Sum);
         case V1_3::OperationType::RELU:
             return ConvertReLu(operation, model, data);
         case V1_3::OperationType::RELU1:
@@ -129,38 +149,36 @@ bool HalPolicy::ConvertOperation(const Operation& operation, const Model& model,
             return ConvertResize(operation, model, data, ResizeMethod::NearestNeighbor);
         case V1_3::OperationType::RSQRT:
             return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Rsqrt);
+        case V1_3::OperationType::SIN:
+            return ConvertElementwiseUnary(operation, model, data, UnaryOperation::Sin);
+        case V1_3::OperationType::SOFTMAX:
+            return ConvertSoftmax(operation, model, data);
+        case V1_3::OperationType::SPACE_TO_BATCH_ND  :
+            return ConvertSpaceToBatchNd(operation, model, data);
+        case V1_3::OperationType::SPACE_TO_DEPTH:
+            return ConvertSpaceToDepth(operation, model, data);
         case V1_3::OperationType::SQRT:
             return ConvertSqrt(operation, model, data);
         case V1_3::OperationType::SQUEEZE:
             return ConvertSqueeze(operation, model, data);
         case V1_3::OperationType::STRIDED_SLICE:
             return ConvertStridedSlice(operation, model, data);
+        case V1_3::OperationType::SUB:
+            return ConvertElementwiseBinary(operation, model, data, BinaryOperation::Sub);
         case V1_3::OperationType::TRANSPOSE:
             return ConvertTranspose(operation, model, data);
         case V1_3::OperationType::TRANSPOSE_CONV_2D:
             return ConvertTransposeConv2d(operation, model, data);
-        case V1_3::OperationType::SOFTMAX:
-            return ConvertSoftmax(operation, model, data);
-        case V1_3::OperationType::SPACE_TO_BATCH_ND  :
-            return ConvertSpaceToBatchNd(operation, model, data);
-        case V1_3::OperationType::SPACE_TO_DEPTH:
-            return ConvertSpaceToDepth(operation, model, data);
-        case V1_3::OperationType::SUB:
-            return ConvertSub(operation, model, data);
         case V1_3::OperationType::TANH:
             return ConvertTanH(operation, model, data);
+        case V1_3::OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM:
+            return ConvertUnidirectionalSequenceLstm(operation, model, data);
         default:
             return Fail("%s: Operation type %s not supported in ArmnnDriver",
                         __func__, toString(operation.type).c_str());
     }
 }
 
-bool HalPolicy::ConvertAdd(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_3::HalPolicy::ConvertAdd()");
-    return ::ConvertAdd<hal_1_3::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertArgMinMax(const V1_3::Operation& operation,
                                  const V1_3::Model& model,
                                  ConversionData& data,
@@ -182,6 +200,18 @@ bool HalPolicy::ConvertBatchToSpaceNd(const Operation& operation, const Model& m
     return ::ConvertBatchToSpaceNd<hal_1_3::HalPolicy>(operation, model, data);
 }
 
+bool HalPolicy::ConvertCast(const Operation& operation, const Model& model, ConversionData& data)
+{
+    ALOGV("hal_1_3::HalPolicy::ConvertCast()");
+    return ::ConvertCast<hal_1_3::HalPolicy>(operation, model, data);
+}
+
+bool HalPolicy::ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data)
+{
+    ALOGV("hal_1_3::HalPolicy::ConvertChannelShuffle()");
+    return ::ConvertChannelShuffle<hal_1_3::HalPolicy>(operation, model, data);
+}
+
 bool HalPolicy::ConvertComparison(const Operation& operation,
                                   const Model& model,
                                   ConversionData& data,
@@ -191,7 +221,6 @@ bool HalPolicy::ConvertComparison(const Operation& operation,
     return ::ConvertComparison_1_2<hal_1_3::HalPolicy>(operation, model, data, comparisonOperation);
 }
 
-
 bool HalPolicy::ConvertConcatenation(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_3::HalPolicy::ConvertConcatenation()");
@@ -222,10 +251,13 @@ bool HalPolicy::ConvertDequantize(const Operation& operation, const Model& model
     return ::ConvertDequantize_1_2<hal_1_3::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertDiv(const Operation& operation, const Model& model, ConversionData& data)
+bool HalPolicy::ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         BinaryOperation binaryOperation)
 {
-    ALOGV("hal_1_3::HalPolicy::ConvertDiv()");
-    return ::ConvertDiv<hal_1_3::HalPolicy>(operation, model, data);
+    ALOGV("hal_1_3::HalPolicy::ConvertElementwiseBinary()");
+    return ::ConvertElementwiseBinary<hal_1_3::HalPolicy>(operation, model, data, binaryOperation);
 }
 
 bool HalPolicy::ConvertElementwiseUnary(const Operation& operation,
@@ -314,6 +346,15 @@ bool HalPolicy::ConvertLocalResponseNormalization(const Operation& operation,
     return ::ConvertLocalResponseNormalization<hal_1_3::HalPolicy>(operation, model, data);
 }
 
+bool HalPolicy::ConvertLogicalBinary(const Operation& operation,
+                                     const Model& model,
+                                     ConversionData& data,
+                                     armnn::LogicalBinaryOperation logicalOperation)
+{
+    ALOGV("hal_1_3::HalPolicy::ConvertLogicalBinary()");
+    return ::ConvertLogicalBinary<hal_1_3::HalPolicy>(operation, model, data, logicalOperation);
+}
+
 bool HalPolicy::ConvertLogistic(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_3::HalPolicy::ConvertLogistic()");
@@ -338,30 +379,12 @@ bool HalPolicy::ConvertMaxPool2d(const Operation& operation, const Model& model,
     return ConvertPooling2d<hal_1_3::HalPolicy>(operation, __func__, PoolingAlgorithm::Max, model, data);
 }
 
-bool HalPolicy::ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_3::HalPolicy::ConvertMaximum()");
-    return ::ConvertMaximum<hal_1_3::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertMean(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_3::HalPolicy::ConvertMean()");
     return ::ConvertMean<hal_1_3::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_3::HalPolicy::ConvertMinimum()");
-    return ::ConvertMinimum<hal_1_3::HalPolicy>(operation, model, data);
-}
-
-bool HalPolicy::ConvertMul(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_3::HalPolicy::ConvertMul()");
-    return ::ConvertMul<hal_1_3::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertPad(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_3::HalPolicy::ConvertPad()");
@@ -404,6 +427,15 @@ bool HalPolicy::ConvertRank(const Operation& operation, const Model& model, Conv
     return ::ConvertRank<hal_1_3::HalPolicy>(operation, model, data);
 }
 
+bool HalPolicy::ConvertReduce(const Operation& operation,
+                              const Model& model,
+                              ConversionData& data,
+                              ReduceOperation reduceOperation)
+{
+    ALOGV("hal_1_3::HalPolicy::ConvertReduce()");
+    return ::ConvertReduce<hal_1_3::HalPolicy>(operation, model, data, reduceOperation);
+}
+
 bool HalPolicy::ConvertReLu(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_3::HalPolicy::ConvertReLu()");
@@ -455,12 +487,6 @@ bool HalPolicy::ConvertSoftmax(const Operation& operation, const Model& model, C
     return ::ConvertSoftmax<hal_1_3::HalPolicy>(operation, model, data);
 }
 
-bool HalPolicy::ConvertSub(const Operation& operation, const Model& model, ConversionData& data)
-{
-    ALOGV("hal_1_3::HalPolicy::ConvertSub()");
-    return ::ConvertSub<hal_1_3::HalPolicy>(operation, model, data);
-}
-
 bool HalPolicy::ConvertTanH(const Operation& operation, const Model& model, ConversionData& data)
 {
     ALOGV("hal_1_3::HalPolicy::ConvertTanH()");
@@ -500,5 +526,11 @@ bool HalPolicy::ConvertTranspose(const Operation& operation, const Model& model,
     return ::ConvertTranspose<hal_1_3::HalPolicy>(operation, model, data);
 }
 
+bool HalPolicy::ConvertUnidirectionalSequenceLstm(const Operation& operation, const Model& model, ConversionData& data)
+{
+    ALOGV("hal_1_3::HalPolicy::ConvertUnidirectionalSequenceLstm()");
+    return ::ConvertUnidirectionalSequenceLstm<hal_1_3::HalPolicy>(operation, model, data);
+}
+
 } // namespace hal_1_3
 } // namespace armnn_driver
diff --git a/1.3/HalPolicy.hpp b/1.3/HalPolicy.hpp
index 0eb5f4d..63e5ee7 100644
--- a/1.3/HalPolicy.hpp
+++ b/1.3/HalPolicy.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -36,8 +36,6 @@ public:
     static bool ConvertOperation(const Operation& operation, const Model& model, ConversionData& data);
 
 private:
-    static bool ConvertAdd(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertArgMinMax(const Operation& operation,
                                  const Model& model,
                                  ConversionData& data,
@@ -47,6 +45,10 @@ private:
 
     static bool ConvertBatchToSpaceNd(const Operation& operation, const Model& model, ConversionData& data);
 
+    static bool ConvertCast(const Operation& operation, const Model& model, ConversionData& data);
+
+    static bool ConvertChannelShuffle(const Operation& operation, const Model& model, ConversionData& data);
+
     static bool ConvertComparison(const Operation& operation,
                                   const Model& model,
                                   ConversionData& data,
@@ -62,7 +64,10 @@ private:
 
     static bool ConvertDequantize(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertDiv(const Operation& operation, const Model& model, ConversionData& data);
+    static bool ConvertElementwiseBinary(const Operation& operation,
+                                         const Model& model,
+                                         ConversionData& data,
+                                         armnn::BinaryOperation binaryOperation);
 
     static bool ConvertElementwiseUnary(const Operation& operation,
                                         const Model& model,
@@ -95,6 +100,11 @@ private:
                                                   const Model& model,
                                                   ConversionData& data);
 
+    static bool ConvertLogicalBinary(const Operation& operation,
+                                     const Model& model,
+                                     ConversionData& data,
+                                     armnn::LogicalBinaryOperation logicalOperation);
+
     static bool ConvertLogistic(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertLogSoftmax(const Operation& operation, const Model& model, ConversionData& data);
@@ -103,14 +113,8 @@ private:
 
     static bool ConvertMaxPool2d(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertMaximum(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertMean(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertMinimum(const Operation& operation, const Model& model, ConversionData& data);
-
-    static bool ConvertMul(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertPad(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertPadV2(const Operation& operation, const Model& model, ConversionData& data);
@@ -125,6 +129,11 @@ private:
 
     static bool ConvertRank(const Operation& operation, const Model& model, ConversionData& data);
 
+    static bool ConvertReduce(const Operation& operation,
+                              const Model& model,
+                              ConversionData& data,
+                              ReduceOperation reduceOperation);
+
     static bool ConvertReLu(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertReLu1(const Operation& operation, const Model& model, ConversionData& data);
@@ -150,13 +159,15 @@ private:
 
     static bool ConvertStridedSlice(const Operation& operation, const Model& model, ConversionData& data);
 
-    static bool ConvertSub(const Operation& operation, const Model& model, ConversionData& data);
-
     static bool ConvertTanH(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertTranspose(const Operation& operation, const Model& model, ConversionData& data);
 
     static bool ConvertTransposeConv2d(const Operation& operation, const Model& model, ConversionData& data);
+
+    static bool ConvertUnidirectionalSequenceLstm(const Operation& operation,
+                                                  const Model& model,
+                                                  ConversionData& data);
 };
 
 } // namespace hal_1_3
diff --git a/ArmnnDevice.cpp b/ArmnnDevice.cpp
index ffb07c8..de22143 100644
--- a/ArmnnDevice.cpp
+++ b/ArmnnDevice.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -98,7 +98,16 @@ ArmnnDevice::ArmnnDevice(DriverOptions options)
             }
             else
             {
-                backends.push_back(backend);
+                if (m_Options.isAsyncModelExecutionEnabled() &&
+                    armnn::HasCapability(armnn::BackendOptions::BackendOption{"AsyncExecution", false}, backend))
+                {
+                    ALOGV("ArmnnDevice: ArmNN does not support AsyncExecution with the following backend: %s",
+                          backend.Get().c_str());
+                }
+                else
+                {
+                    backends.push_back(backend);
+                }
             }
         }
     }
diff --git a/ArmnnDriverImpl.cpp b/ArmnnDriverImpl.cpp
index a9b41bd..dd60cc7 100644
--- a/ArmnnDriverImpl.cpp
+++ b/ArmnnDriverImpl.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2023 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -23,6 +23,7 @@
 
 #include <ValidateHal.h>
 #include <log/log.h>
+#include <chrono>
 
 using namespace std;
 using namespace android;
@@ -70,6 +71,8 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
 {
     ALOGV("ArmnnDriverImpl::prepareModel()");
 
+    std::chrono::time_point<std::chrono::system_clock> prepareModelTimepoint = std::chrono::system_clock::now();
+
     if (cb.get() == nullptr)
     {
         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
@@ -100,21 +103,36 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
         return V1_0::ErrorStatus::NONE;
     }
 
+    // Serialize the network graph to a .armnn file if an output directory
+    // has been specified in the drivers' arguments.
+    std::vector<uint8_t> dataCacheData;
+    auto serializedNetworkFileName =
+        SerializeNetwork(*modelConverter.GetINetwork(),
+                         options.GetRequestInputsAndOutputsDumpDir(),
+                         dataCacheData,
+                         false);
+
     // Optimize the network
     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
-    armnn::OptimizerOptions OptOptions;
-    OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
+    armnn::OptimizerOptionsOpaque OptOptions;
+    OptOptions.SetReduceFp32ToFp16(float32ToFloat16);
 
     armnn::BackendOptions gpuAcc("GpuAcc",
     {
-        { "FastMathEnabled", options.IsFastMathEnabled() }
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "SaveCachedNetwork", options.SaveCachedNetwork() },
+        { "CachedNetworkFilePath", options.GetCachedNetworkFilePath() },
+        { "MLGOTuningFilePath", options.GetClMLGOTunedParametersFile() }
+
     });
+
     armnn::BackendOptions cpuAcc("CpuAcc",
     {
-        { "FastMathEnabled", options.IsFastMathEnabled() }
+        { "FastMathEnabled", options.IsFastMathEnabled() },
+        { "NumberOfThreads", options.GetNumberOfThreads() }
     });
-    OptOptions.m_ModelOptions.push_back(gpuAcc);
-    OptOptions.m_ModelOptions.push_back(cpuAcc);
+    OptOptions.AddModelOption(gpuAcc);
+    OptOptions.AddModelOption(cpuAcc);
 
     std::vector<std::string> errMessages;
     try
@@ -152,9 +170,14 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
 
     // Load it into the runtime.
     armnn::NetworkId netId = 0;
+    std::string msg;
+    armnn::INetworkProperties networkProperties(options.isAsyncModelExecutionEnabled(),
+                                                armnn::MemorySource::Undefined,
+                                                armnn::MemorySource::Undefined);
+
     try
     {
-        if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
+        if (runtime->LoadNetwork(netId, move(optNet), msg, networkProperties) != armnn::Status::Success)
         {
             return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
         }
@@ -167,11 +190,12 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
         return V1_0::ErrorStatus::NONE;
     }
 
-    // Now that we have a networkId for the graph rename the dump file to use it
-    // so that we can associate the graph file and the input/output tensor dump files
-    RenameGraphDotFile(dotGraphFileName,
-                       options.GetRequestInputsAndOutputsDumpDir(),
-                       netId);
+    // Now that we have a networkId for the graph rename the exported files to use it
+    // so that we can associate the graph file and the input/output tensor exported files
+    RenameExportedFiles(serializedNetworkFileName,
+                        dotGraphFileName,
+                        options.GetRequestInputsAndOutputsDumpDir(),
+                        netId);
 
     sp<ArmnnPreparedModel<HalPolicy>> preparedModel(
             new ArmnnPreparedModel<HalPolicy>(
@@ -179,32 +203,43 @@ Return<V1_0::ErrorStatus> ArmnnDriverImpl<HalPolicy>::prepareModel(
                     runtime.get(),
                     model,
                     options.GetRequestInputsAndOutputsDumpDir(),
-                    options.IsGpuProfilingEnabled()));
-
-    // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
-    // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
-    if (!preparedModel->ExecuteWithDummyInputs())
+                    options.IsGpuProfilingEnabled(),
+                    options.isAsyncModelExecutionEnabled(),
+                    options.getNoOfArmnnThreads(),
+                    options.isImportEnabled(),
+                    options.isExportEnabled()));
+
+    if (std::find(options.GetBackends().begin(),
+                  options.GetBackends().end(),
+                  armnn::Compute::GpuAcc) != options.GetBackends().end())
     {
-        return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
-    }
-
-    if (clTunedParameters &&
-        options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
-    {
-        // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
-        try
+        // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
+        // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
+        if (!preparedModel->ExecuteWithDummyInputs())
         {
-            clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
         }
-        catch (std::exception& error)
+
+        if (clTunedParameters &&
+            options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
         {
-            ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
-                  options.GetClTunedParametersFile().c_str(), error.what());
+            // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file
+            try
+            {
+                clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
+            }
+            catch (std::exception& error)
+            {
+                ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
+                      options.GetClTunedParametersFile().c_str(), error.what());
+            }
         }
     }
-
     NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel);
 
+    ALOGV("ArmnnDriverImpl::prepareModel cache timing = %lld µs", std::chrono::duration_cast<std::chrono::microseconds>
+         (std::chrono::system_clock::now() - prepareModelTimepoint).count());
+
     return V1_0::ErrorStatus::NONE;
 }
 
diff --git a/ArmnnDriverImpl.hpp b/ArmnnDriverImpl.hpp
index a6ac5f3..4785ef7 100644
--- a/ArmnnDriverImpl.hpp
+++ b/ArmnnDriverImpl.hpp
@@ -6,12 +6,15 @@
 #pragma once
 
 #include "DriverOptions.hpp"
-#include "NamespaceAdaptor.hpp"
 
 #include <HalInterfaces.h>
 
 #ifdef ARMNN_ANDROID_R
-using namespace android::nn;
+using namespace android::nn::hal;
+#endif
+
+#ifdef ARMNN_ANDROID_S
+using namespace android::hardware;
 #endif
 
 namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp
index cfa8a74..d87f9f8 100644
--- a/ArmnnPreparedModel.cpp
+++ b/ArmnnPreparedModel.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -8,13 +8,19 @@
 #include "ArmnnPreparedModel.hpp"
 #include "Utils.hpp"
 
+#include <armnn/Types.hpp>
+
 #include <log/log.h>
 #include <OperationsUtils.h>
 #include <ValidateHal.h>
 
-#include <cassert>
+#include <chrono>
 #include <cinttypes>
 
+#ifdef ARMNN_ANDROID_S
+#include <LegacyUtils.h>
+#endif
+
 using namespace android;
 
 namespace
@@ -86,6 +92,9 @@ RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
     ArmnnPreparedModel<HalVersion>::m_RequestThread;
 
 template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
 template <typename TensorBindingCollection>
 void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
                                                            const TensorBindingCollection& tensorBindings)
@@ -108,16 +117,43 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
                                                    armnn::IRuntime* runtime,
                                                    const HalModel& model,
                                                    const std::string& requestInputsAndOutputsDumpDir,
-                                                   const bool gpuProfilingEnabled)
+                                                   const bool gpuProfilingEnabled,
+                                                   const bool asyncModelExecutionEnabled,
+                                                   const unsigned int numberOfThreads,
+                                                   const bool importEnabled,
+                                                   const bool exportEnabled)
     : m_NetworkId(networkId)
     , m_Runtime(runtime)
     , m_Model(model)
     , m_RequestCount(0)
     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
     , m_GpuProfilingEnabled(gpuProfilingEnabled)
+    , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_EnableImport(importEnabled)
+    , m_EnableExport(exportEnabled)
 {
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+        for (unsigned int i=0; i < numberOfThreads; ++i)
+        {
+            memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+        }
+
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
+        m_WorkingMemHandle = memHandles.back();
+    }
 }
 
 template<typename HalVersion>
@@ -125,12 +161,21 @@ ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
 {
     // Get a hold of the profiler used by this model.
     std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+    if (profiler && m_GpuProfilingEnabled)
+    {
+        // Dump the profiling info to a file if required.
+        DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
+                                    profiler.get());
+    }
 
     // Unload the network associated with this model.
     m_Runtime->UnloadNetwork(m_NetworkId);
 
-    // Dump the profiling info to a file if required.
-    DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
+    // Unload the network memhandles from the threadpool
+    if (m_AsyncModelExecutionEnabled)
+    {
+        m_Threadpool->UnloadMemHandles(m_NetworkId);
+    }
 }
 
 template<typename HalVersion>
@@ -164,7 +209,11 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
     // map the memory pool into shared pointers
     // use a shared memory pools vector on the heap, as it is passed to the request thread
     auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
+#if !defined(ARMNN_ANDROID_S)
     if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
+#else
+    if (!setRunTimePoolInfosFromCanonicalMemories(pMemPools.get(), uncheckedConvert(request.pools)))
+#endif
     {
         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
         return V1_0::ErrorStatus::GENERAL_FAILURE;
@@ -177,35 +226,26 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
         for (unsigned int i = 0; i < request.inputs.size(); i++)
         {
             const auto& inputArg = request.inputs[i];
-
-            const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
-            const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
-
-            uint32_t poolIndex = inputArg.location.poolIndex;
-            if (poolIndex >= pMemPools->size())
+            armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+            // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+            // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+            inputTensorInfo.SetConstant();
+            auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+                                                                                    inputTensorInfo,
+                                                                                    inputArg,
+                                                                                    "input");
+            if (result != V1_0::ErrorStatus::NONE)
             {
-                ALOGE("Cannot execute request. Error converting request input %u to tensor: wrong poolIndex", i);
-                return V1_0::ErrorStatus::GENERAL_FAILURE;
+                return result;
             }
 
-            uint8_t* inputTensorBegin = static_cast<uint8_t*>(inputTensor.GetMemoryArea());
-            if (inputTensorBegin == nullptr)
+            const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
+            if (inputTensor.GetMemoryArea() == nullptr)
             {
                 ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
                 return V1_0::ErrorStatus::GENERAL_FAILURE;
             }
 
-            const size_t inputTensorSize = inputTensorInfo.GetNumBytes();
-            uint8_t* memoryPoolBegin = (*pMemPools)[poolIndex].getBuffer();
-            uint32_t memoryPoolSize = (*pMemPools)[poolIndex].getSize();
-            bool inputTensorIsOutOfMemoryRage = (inputTensorBegin + inputTensorSize) > (memoryPoolBegin + memoryPoolSize);
-
-            if (inputTensorIsOutOfMemoryRage)
-            {
-                ALOGE("Cannot execute request. Error converting request input %u to tensor: out of Memory Pool", i);
-                return V1_0::ErrorStatus::GENERAL_FAILURE;
-            }
-
             pInputTensors->emplace_back(i, inputTensor);
         }
 
@@ -213,35 +253,24 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
         for (unsigned int i = 0; i < request.outputs.size(); i++)
         {
             const auto& outputArg = request.outputs[i];
-
             const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
-            const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
+            auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+                                                                                    outputTensorInfo,
+                                                                                    outputArg,
+                                                                                    "output");
 
-            uint32_t poolIndex = outputArg.location.poolIndex;
-            if (poolIndex >= pMemPools->size())
+            if (result != V1_0::ErrorStatus::NONE)
             {
-                ALOGE("Cannot execute request. Error converting request output %u to tensor: wrong poolIndex", i);
-                return V1_0::ErrorStatus::GENERAL_FAILURE;
+                return result;
             }
 
-            uint8_t* outputTensorBegin = static_cast<uint8_t*>(outputTensor.GetMemoryArea());
-            if (outputTensorBegin == nullptr)
+            const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
+            if (outputTensor.GetMemoryArea() == nullptr)
             {
                 ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
                 return V1_0::ErrorStatus::GENERAL_FAILURE;
             }
 
-            const size_t outputTensorSize = outputTensorInfo.GetNumBytes();
-            uint8_t* memoryPoolBegin = (*pMemPools)[poolIndex].getBuffer();
-            uint32_t memoryPoolSize = (*pMemPools)[poolIndex].getSize();
-            bool outputTensorIsOutOfMemoryRage = (outputTensorBegin + outputTensorSize) > (memoryPoolBegin + memoryPoolSize);
-
-            if (outputTensorIsOutOfMemoryRage)
-            {
-                ALOGE("Cannot execute request. Error converting request output %u to tensor: out of Memory Pool", i);
-                return V1_0::ErrorStatus::GENERAL_FAILURE;
-            }
-
             pOutputTensors->emplace_back(i, outputTensor);
         }
     }
@@ -258,8 +287,6 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
         return V1_0::ErrorStatus::GENERAL_FAILURE;
     }
 
-    ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
-
     auto cb = [callback](V1_0::ErrorStatus errorStatus, std::string callingFunction)
     {
         NotifyCallbackAndCheck(callback, errorStatus, callingFunction);
@@ -267,7 +294,17 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
 
     CallbackContext_1_0 armnnCb;
     armnnCb.callback = cb;
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        ALOGV("ArmnnPreparedModel::execute(...) before ScheduleGraphForExecution");
+        ScheduleGraphForExecution(pMemPools, pInputTensors, pOutputTensors, armnnCb);
+        ALOGV("ArmnnPreparedModel::execute(...) after ScheduleGraphForExecution");
+        return V1_0::ErrorStatus::NONE;
+    }
+
     // post the request for asynchronous execution
+    ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
     m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
     ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
     return V1_0::ErrorStatus::NONE; // successfully queued
@@ -281,13 +318,37 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
         CallbackContext_1_0 cb)
 {
     ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
+    // Capture the graph execution start time.
+    std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
 
     DumpTensorsIfRequired("Input", inputTensors);
 
     // run it
     try
     {
-        armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+        armnn::Status status;
+        if (m_AsyncModelExecutionEnabled)
+        {
+            ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
+            status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+        }
+        else
+        {
+            ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
+            // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+            std::vector<armnn::ImportedInputId> importedInputIds;
+            if (m_EnableImport)
+            {
+                importedInputIds =  m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+            }
+            std::vector<armnn::ImportedOutputId> importedOutputIds;
+            if (m_EnableExport)
+            {
+                importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+            }
+            status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+                                                importedInputIds, importedOutputIds);
+        }
         if (status != armnn::Status::Success)
         {
             ALOGW("EnqueueWorkload failed");
@@ -317,13 +378,19 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
     {
         // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
         // update() has been removed and flush() added.
-        #if defined(ARMNN_ANDROID_R) // Use the new Android implementation.
+        #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
             pool.flush();
         #else
             pool.update();
         #endif
     }
 
+    // Log the total time in this call. This is a good number to compare to that printed out by
+    // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+    ALOGI("ArmnnPreparedModel::ExecuteGraph Execution time = %lld µs",
+           std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - graphExecutionStart).count());
+
     cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
 }
 
@@ -334,7 +401,11 @@ bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
     armnn::InputTensors inputTensors;
     for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
     {
-        const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+        // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+        inputTensorInfo.SetConstant();
+
         storage.emplace_back(inputTensorInfo.GetNumBytes());
         const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
 
@@ -353,7 +424,29 @@ bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
 
     try
     {
-        armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+        armnn::Status status;
+        if (m_AsyncModelExecutionEnabled)
+        {
+            ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true");
+            status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+        }
+        else
+        {
+            ALOGW("ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false");
+            // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+            std::vector<armnn::ImportedInputId> importedInputIds;
+            if (m_EnableImport)
+            {
+                importedInputIds =  m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+            }
+            std::vector<armnn::ImportedOutputId> importedOutputIds;
+            if (m_EnableExport)
+            {
+                importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+            }
+            status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+                                                importedInputIds, importedOutputIds);
+        }
         if (status != armnn::Status::Success)
         {
             ALOGW("ExecuteWithDummyInputs: EnqueueWorkload failed");
@@ -373,11 +466,73 @@ bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
     return true;
 }
 
+/// Schedule the graph prepared from the request for execution
+template<typename HalVersion>
+template<typename CallbackContext>
+void ArmnnPreparedModel<HalVersion>::ScheduleGraphForExecution(
+        std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+        std::shared_ptr<armnn::InputTensors>& inputTensors,
+        std::shared_ptr<armnn::OutputTensors>& outputTensors,
+        CallbackContext callbackContext)
+{
+    ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution(...)");
+
+    DumpTensorsIfRequired("Input", *inputTensors);
+
+
+    auto tpCb = std::make_shared<
+                ArmnnThreadPoolCallback<CallbackContext_1_0>>(this,
+                                                              pMemPools,
+                                                              inputTensors,
+                                                              outputTensors,
+                                                              callbackContext);
+
+    m_Threadpool->Schedule(m_NetworkId,
+                           *tpCb->m_InputTensors,
+                           *tpCb->m_OutputTensors,
+                           armnn::QosExecPriority::Medium,
+                           tpCb);
+    ALOGV("ArmnnPreparedModel::ScheduleGraphForExecution end");
+}
+
+template<typename HalVersion>
+template <typename CallbackContext>
+void ArmnnPreparedModel<HalVersion>::ArmnnThreadPoolCallback<CallbackContext>::Notify(
+        armnn::Status status, armnn::InferenceTimingPair timeTaken)
+{
+    armnn::IgnoreUnused(status, timeTaken);
+    ALOGV("ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
+
+    m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
+
+    // Commit output buffers.
+    // Note that we update *all* pools, even if they aren't actually used as outputs -
+    // this is simpler and is what the CpuExecutor does.
+    for (android::nn::RunTimePoolInfo& pool : *m_MemPools)
+    {
+        // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
+        // update() has been removed and flush() added.
+        #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
+            pool.flush();
+        #else
+            pool.update();
+        #endif
+    }
+
+    m_CallbackContext.callback(V1_0::ErrorStatus::NONE, "ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify");
+    return;
+}
+
 ///
 /// Class template specializations
 ///
 
 template class ArmnnPreparedModel<hal_1_0::HalPolicy>;
+template void ArmnnPreparedModel<hal_1_0::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_0>(
+        std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+        std::shared_ptr<armnn::InputTensors>& inputTensors,
+        std::shared_ptr<armnn::OutputTensors>& outputTensors,
+        CallbackContext_1_0 callbackContext);
 
 #ifdef ARMNN_ANDROID_NN_V1_1
 template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp
index 89f6226..b9d89d4 100644
--- a/ArmnnPreparedModel.hpp
+++ b/ArmnnPreparedModel.hpp
@@ -11,6 +11,7 @@
 
 #include <NeuralNetworks.h>
 #include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
 
 #include <string>
 #include <vector>
@@ -38,7 +39,11 @@ public:
                        armnn::IRuntime* runtime,
                        const HalModel& model,
                        const std::string& requestInputsAndOutputsDumpDir,
-                       const bool gpuProfilingEnabled);
+                       const bool gpuProfilingEnabled,
+                       const bool asyncModelExecutionEnabled = false,
+                       const unsigned int numberOfThreads = 1,
+                       const bool importEnabled = false,
+                       const bool exportEnabled = false);
 
     virtual ~ArmnnPreparedModel();
 
@@ -56,18 +61,60 @@ public:
     bool ExecuteWithDummyInputs();
 
 private:
+
+    template<typename CallbackContext>
+    class ArmnnThreadPoolCallback : public armnn::IAsyncExecutionCallback
+    {
+    public:
+        ArmnnThreadPoolCallback(ArmnnPreparedModel<HalVersion>* model,
+                                std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+                                std::shared_ptr<armnn::InputTensors>& inputTensors,
+                                std::shared_ptr<armnn::OutputTensors>& outputTensors,
+                                CallbackContext callbackContext) :
+                m_Model(model),
+                m_MemPools(pMemPools),
+                m_InputTensors(inputTensors),
+                m_OutputTensors(outputTensors),
+                m_CallbackContext(callbackContext)
+        {}
+
+        void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
+
+        ArmnnPreparedModel<HalVersion>* m_Model;
+        std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+        std::shared_ptr<armnn::InputTensors> m_InputTensors;
+        std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+        CallbackContext m_CallbackContext;
+    };
+
     template <typename TensorBindingCollection>
     void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
 
-    armnn::NetworkId                                                        m_NetworkId;
-    armnn::IRuntime*                                                        m_Runtime;
-    HalModel                                                                m_Model;
+    /// schedule the graph prepared from the request for execution
+    template<typename CallbackContext>
+    void ScheduleGraphForExecution(
+            std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+            std::shared_ptr<armnn::InputTensors>& inputTensors,
+            std::shared_ptr<armnn::OutputTensors>& outputTensors,
+            CallbackContext m_CallbackContext);
+
+    armnn::NetworkId                          m_NetworkId;
+    armnn::IRuntime*                          m_Runtime;
+    HalModel                                  m_Model;
     // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
     // It is specific to this class, so it is declared as static here
-    static RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0> m_RequestThread;
-    uint32_t                                                                m_RequestCount;
-    const std::string&                                                      m_RequestInputsAndOutputsDumpDir;
-    const bool                                                              m_GpuProfilingEnabled;
+    static RequestThread<ArmnnPreparedModel,
+                         HalVersion,
+                         CallbackContext_1_0> m_RequestThread;
+    uint32_t                                  m_RequestCount;
+    const std::string&                        m_RequestInputsAndOutputsDumpDir;
+    const bool                                m_GpuProfilingEnabled;
+    // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+    static std::unique_ptr<armnn::Threadpool> m_Threadpool;
+    std::shared_ptr<armnn::IWorkingMemHandle> m_WorkingMemHandle;
+    const bool m_AsyncModelExecutionEnabled;
+    const bool m_EnableImport;
+    const bool m_EnableExport;
 };
 
 }
diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp
index f5c73cf..a401b30 100644
--- a/ArmnnPreparedModel_1_2.cpp
+++ b/ArmnnPreparedModel_1_2.cpp
@@ -1,21 +1,28 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #define LOG_TAG "ArmnnDriver"
 
 #include "ArmnnPreparedModel_1_2.hpp"
+
 #include "Utils.hpp"
 
+#include <armnn/Types.hpp>
+
 #include <log/log.h>
 #include <OperationsUtils.h>
 #include <ExecutionBurstServer.h>
 #include <ValidateHal.h>
 
-#include <cassert>
+#include <chrono>
 #include <cinttypes>
 
+#ifdef ARMNN_ANDROID_S
+#include <LegacyUtils.h>
+#endif
+
 using namespace android;
 using namespace android::hardware;
 
@@ -120,6 +127,9 @@ RequestThread<ArmnnPreparedModel_1_2, HalVersion, CallbackContext_1_2>
         ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread;
 
 template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_2<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
 template<typename TensorBindingCollection>
 void ArmnnPreparedModel_1_2<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
                                                                const TensorBindingCollection& tensorBindings)
@@ -142,16 +152,88 @@ ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId netw
                                                            armnn::IRuntime* runtime,
                                                            const V1_2::Model& model,
                                                            const std::string& requestInputsAndOutputsDumpDir,
-                                                           const bool gpuProfilingEnabled)
+                                                           const bool gpuProfilingEnabled,
+                                                           const bool asyncModelExecutionEnabled,
+                                                           const unsigned int numberOfThreads,
+                                                           const bool importEnabled,
+                                                           const bool exportEnabled)
     : m_NetworkId(networkId)
     , m_Runtime(runtime)
     , m_Model(model)
     , m_RequestCount(0)
     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
     , m_GpuProfilingEnabled(gpuProfilingEnabled)
+    , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_EnableImport(importEnabled)
+    , m_EnableExport(exportEnabled)
+    , m_PreparedFromCache(false)
 {
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+        for (unsigned int i=0; i < numberOfThreads; ++i)
+        {
+            memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+        }
+
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
+        m_WorkingMemHandle = memHandles.back();
+    }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_2<HalVersion>::ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+                                                           armnn::IRuntime* runtime,
+                                                           const std::string& requestInputsAndOutputsDumpDir,
+                                                           const bool gpuProfilingEnabled,
+                                                           const bool asyncModelExecutionEnabled,
+                                                           const unsigned int numberOfThreads,
+                                                           const bool importEnabled,
+                                                           const bool exportEnabled,
+                                                           const bool preparedFromCache)
+    : m_NetworkId(networkId)
+    , m_Runtime(runtime)
+    , m_RequestCount(0)
+    , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+    , m_GpuProfilingEnabled(gpuProfilingEnabled)
+    , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_EnableImport(importEnabled)
+    , m_EnableExport(exportEnabled)
+    , m_PreparedFromCache(preparedFromCache)
+{
+    // Enable profiling if required.
+    m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+        for (unsigned int i=0; i < numberOfThreads; ++i)
+        {
+            memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+        }
+
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
+        m_WorkingMemHandle = memHandles.back();
+    }
 }
 
 template<typename HalVersion>
@@ -159,12 +241,21 @@ ArmnnPreparedModel_1_2<HalVersion>::~ArmnnPreparedModel_1_2()
 {
     // Get a hold of the profiler used by this model.
     std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+    if (profiler && m_GpuProfilingEnabled)
+    {
+        // Dump the profiling info to a file if required.
+        DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
+                                    profiler.get());
+    }
 
     // Unload the network associated with this model.
     m_Runtime->UnloadNetwork(m_NetworkId);
 
-    // Dump the profiling info to a file if required.
-    DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
+    // Unload the network memhandles from the threadpool
+    if (m_AsyncModelExecutionEnabled)
+    {
+        m_Threadpool->UnloadMemHandles(m_NetworkId);
+    }
 }
 
 template<typename HalVersion>
@@ -221,32 +312,25 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForIn
     for (unsigned int i = 0; i < request.inputs.size(); i++)
     {
         const auto& inputArg = request.inputs[i];
-
-        const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
-        const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
-
-        uint32_t poolIndex = inputArg.location.poolIndex;
-        if (poolIndex >= memPools.size())
-        {
-            ALOGE("Cannot execute request. Error converting request input %u to tensor: wrong poolIndex", i);
-            return V1_0::ErrorStatus::GENERAL_FAILURE;
-        }
-
-        uint8_t* inputTensorBegin = static_cast<uint8_t*>(inputTensor.GetMemoryArea());
-        if (inputTensorBegin == nullptr)
+        armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        // inputs (of type InputTensors) is composed of a vector of ConstTensors.
+        // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+        inputTensorInfo.SetConstant();
+        auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+                                                                                inputTensorInfo,
+                                                                                inputArg,
+                                                                                "input");
+
+        if (result != V1_0::ErrorStatus::NONE)
         {
-            ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
-            return V1_0::ErrorStatus::GENERAL_FAILURE;
+            return result;
         }
 
-        const size_t inputTensorSize = inputTensorInfo.GetNumBytes();
-        uint8_t* memoryPoolBegin = memPools[poolIndex].getBuffer();
-        uint32_t memoryPoolSize = memPools[poolIndex].getSize();
-        bool inputTensorIsOutOfMemoryRage = (inputTensorBegin + inputTensorSize) > (memoryPoolBegin + memoryPoolSize);
+        const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
 
-        if (inputTensorIsOutOfMemoryRage)
+        if (inputTensor.GetMemoryArea() == nullptr)
         {
-            ALOGE("Cannot execute request. Error converting request input %u to tensor: out of Memory Pool", i);
+            ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
             return V1_0::ErrorStatus::GENERAL_FAILURE;
         }
 
@@ -267,11 +351,19 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForOu
     for (unsigned int i = 0; i < request.outputs.size(); i++)
     {
         const auto& outputArg = request.outputs[i];
+        armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
+        auto result = ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(request,
+                                                                                outputTensorInfo,
+                                                                                outputArg,
+                                                                                "output");
+
+        if (result != V1_0::ErrorStatus::NONE)
+        {
+            return result;
+        }
 
-        const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
         const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
-        uint8_t* outputTensorBegin = static_cast<uint8_t*>(outputTensor.GetMemoryArea());
-        if (outputTensorBegin == nullptr)
+        if (outputTensor.GetMemoryArea() == nullptr)
         {
             ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
             return V1_0::ErrorStatus::GENERAL_FAILURE;
@@ -285,29 +377,23 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForOu
             return V1_0::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
         }
 
-        const size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
+#if !defined(ARMNN_ANDROID_S)
+        const size_t bufferSize = memPools.at(outputArg.location.poolIndex).getHidlMemory().size();
         if (bufferSize < outputSize)
         {
             ALOGW("ArmnnPreparedModel_1_2::Execute failed: bufferSize < outputSize");
             return V1_0::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
         }
-
-        uint32_t poolIndex = outputArg.location.poolIndex;
-        if (poolIndex >= memPools.size())
-        {
-            ALOGE("Cannot execute request. Error converting request output %u to tensor: wrong poolIndex", i);
-            return V1_0::ErrorStatus::GENERAL_FAILURE;
-        }
-
-        uint8_t* memoryPoolBegin = memPools[poolIndex].getBuffer();
-        uint32_t memoryPoolSize = memPools[poolIndex].getSize();
-        bool outputTensorIsOutOfMemoryRage = (outputTensorBegin + outputSize) > (memoryPoolBegin + memoryPoolSize);
-        if (outputTensorIsOutOfMemoryRage)
+#else
+        const size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
+        if (bufferSize < outputSize)
         {
-            ALOGE("Cannot execute request. Error converting request output %u to tensor: out of Memory Pool", i);
-            return V1_0::ErrorStatus::GENERAL_FAILURE;
+            ALOGW("ArmnnPreparedModel_1_2::Execute failed bufferSize (%s) < outputSize (%s)",
+                  std::to_string(bufferSize).c_str(), std::to_string(outputSize).c_str());
+            outputShapes[i].isSufficient = false;
+            return V1_0::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
         }
-
+#endif
         outputs.emplace_back(i, outputTensor);
         outputShapes[i] = ComputeShape(outputTensorInfo);
     }
@@ -323,12 +409,15 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForIO
                                          const V1_0::Request& request,
                                          CallbackAsync_1_2 callback)
 {
+#if !defined(ARMNN_ANDROID_S)
     if (!setRunTimePoolInfosFromHidlMemories(&memPools, request.pools))
+#else
+    if (!setRunTimePoolInfosFromCanonicalMemories(&memPools, uncheckedConvert(request.pools)))
+#endif
     {
         callback(V1_0::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
         return V1_0::ErrorStatus::GENERAL_FAILURE;
     }
-
     // add the inputs and outputs with their data
     try
     {
@@ -369,9 +458,12 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::PrepareMemoryForIO
 template<typename HalVersion>
 Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const V1_0::Request& request,
                                                                       V1_2::MeasureTiming measureTiming,
-                                                                      V1_2::IPreparedModel::executeSynchronously_cb cb)
+                                                                      executeSynchronously_cb cb)
 {
-    ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_2::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (cb == nullptr)
@@ -387,7 +479,7 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const V1_0
         driverStart = Now();
     }
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         ALOGE("ArmnnPreparedModel_1_2::executeSynchronously invalid request model");
         cb(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming);
@@ -438,6 +530,8 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
     ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)");
 
     TimePoint driverEnd, deviceStart, deviceEnd;
+    // Capture the graph execution start time.
+    std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
 
     DumpTensorsIfRequired("Input", inputTensors);
 
@@ -459,7 +553,30 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
             deviceStart = Now();
         }
 
-        armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+        armnn::Status status;
+        if (m_AsyncModelExecutionEnabled)
+        {
+            ALOGW("ArmnnPreparedModel_1_2::ExecuteGraph m_AsyncModelExecutionEnabled true");
+            status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+        }
+        else
+        {
+            ALOGW("ArmnnPreparedModel_1_2::ExecuteGraph m_AsyncModelExecutionEnabled false");
+
+            // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+            std::vector<armnn::ImportedInputId> importedInputIds;
+            if (m_EnableImport)
+            {
+                importedInputIds =  m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+            }
+            std::vector<armnn::ImportedOutputId> importedOutputIds;
+            if (m_EnableExport)
+            {
+                importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+            }
+            status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+                                                importedInputIds, importedOutputIds);
+        }
 
         if (cb.ctx.measureTimings == V1_2::MeasureTiming::YES)
         {
@@ -496,24 +613,33 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph(
         V1_2::Timing timing;
         timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart);
         timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.ctx.driverStart);
-        ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %" PRIu64 " Driver = %" PRIu64, timing.timeOnDevice,
-              timing.timeInDriver);
+        ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu",
+              static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
         cb.callback(V1_0::ErrorStatus::NONE, outputShapes, timing, "ArmnnPreparedModel_1_2::ExecuteGraph");
     } else {
         cb.callback(V1_0::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_2::ExecuteGraph");
     }
 
+    // Log the total time in this call. This is a good number to compare to that printed out by
+    // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+    ALOGI("ArmnnPreparedModel_1_2::ExecuteGraph Execution time = %lld µs",
+          std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - graphExecutionStart).count());
     return true;
 }
 
 template<typename HalVersion>
-bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
+bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
 {
     std::vector<std::vector<char>> storage;
     armnn::InputTensors inputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numInputs; i++)
     {
-        const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+        // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+        inputTensorInfo.SetConstant();
+
         storage.emplace_back(inputTensorInfo.GetNumBytes());
         const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
 
@@ -521,7 +647,7 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs()
     }
 
     armnn::OutputTensors outputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numOutputs; i++)
     {
         const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
         storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -553,10 +679,13 @@ Return <V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const V1_
         ctx.driverStart = Now();
     }
 
-    ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         callback(V1_0::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute");
         return V1_0::ErrorStatus::INVALID_ARGUMENT;
@@ -586,12 +715,21 @@ Return <V1_0::ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const V1_
         {}
     }
 
-    ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg");
 
     // post the request for asynchronous execution
     CallbackContext_1_2 cb;
     cb.callback = callback;
     cb.ctx = ctx;
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        ALOGV("ArmnnPreparedModel_1_2::execute(...) before ScheduleGraphForExecution");
+        ScheduleGraphForExecution(memPools, inputTensors, outputTensors, cb);
+        ALOGV("ArmnnPreparedModel_1_2::execute(...) after ScheduleGraphForExecution");
+        return V1_0::ErrorStatus::NONE;
+    }
+
+    ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg");
     m_RequestThread.PostMsg(this, memPools, inputTensors, outputTensors, cb);
     ALOGV("ArmnnPreparedModel_1_2::execute(...) after PostMsg");
     return V1_0::ErrorStatus::NONE;
@@ -621,6 +759,84 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::configureExecutionBurst(
     return Void();
 }
 
+/// Schedule the graph prepared from the request for execution
+template<typename HalVersion>
+template<typename CallbackContext>
+void ArmnnPreparedModel_1_2<HalVersion>::ScheduleGraphForExecution(
+        std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+        std::shared_ptr<armnn::InputTensors>& inputTensors,
+        std::shared_ptr<armnn::OutputTensors>& outputTensors,
+        CallbackContext callbackContext)
+{
+    ALOGV("ArmnnPreparedModel_1_2::ScheduleGraphForExecution(...)");
+
+    DumpTensorsIfRequired("Input", *inputTensors);
+
+    unsigned int outputTensorSize = outputTensors.get()->size();
+    std::vector<V1_2::OutputShape> outputShapes(outputTensorSize);
+    for (unsigned int i = 0; i < outputTensorSize; i++)
+    {
+        std::pair<int, armnn::Tensor> outputTensorPair = outputTensors.get()->at(i);
+        const armnn::Tensor outputTensor = outputTensorPair.second;
+        const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo();
+
+        outputShapes[i] = ComputeShape(outputTensorInfo);
+    }
+
+    auto tpCb = std::make_shared<
+        ArmnnThreadPoolCallback_1_2<CallbackContext_1_2>>(this,
+                                                          pMemPools,
+                                                          outputShapes,
+                                                          inputTensors,
+                                                          outputTensors,
+                                                          callbackContext);
+
+    m_Threadpool->Schedule(m_NetworkId,
+                           *tpCb->m_InputTensors,
+                           *tpCb->m_OutputTensors,
+                           armnn::QosExecPriority::Medium,
+                           tpCb);
+    ALOGV("ArmnnPreparedModel_1_2::ScheduleGraphForExecution end");
+}
+
+template<typename HalVersion>
+template <typename CallbackContext>
+void ArmnnPreparedModel_1_2<HalVersion>::ArmnnThreadPoolCallback_1_2<CallbackContext>::Notify(
+        armnn::Status status, armnn::InferenceTimingPair timeTaken)
+{
+    ALOGV("ArmnnPreparedModel_1_2::ArmnnThreadPoolCallback_1_2 Notify");
+
+    TimePoint driverEnd;
+
+    CommitPools(*m_MemPools);
+
+    m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
+
+    if (status != armnn::Status::Success)
+    {
+        ALOGW("ArmnnThreadPoolCallback::Notify EnqueueWorkload failed");
+        m_CallbackContext.callback(
+                V1_0::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel::ExecuteGraph");
+        return;
+    }
+
+    if (m_CallbackContext.ctx.measureTimings == V1_2::MeasureTiming::YES)
+    {
+        driverEnd = std::chrono::steady_clock::now();
+        V1_2::Timing timing;
+        timing.timeOnDevice = MicrosecondsDuration(timeTaken.second, timeTaken.first);
+        timing.timeInDriver = MicrosecondsDuration(driverEnd, m_CallbackContext.ctx.driverStart);
+        ALOGV("ArmnnPreparedModel_1_2::execute timing - Device = %lu Driver = %lu",
+              static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
+        m_CallbackContext.callback(
+                V1_0::ErrorStatus::NONE, m_OutputShapes, timing, "ArmnnPreparedModel_1_2::ExecuteGraph");
+    } else {
+        m_CallbackContext.callback(
+                V1_0::ErrorStatus::NONE, m_OutputShapes, g_NoTiming, "ArmnnPreparedModel_1_2::ExecuteGraph");
+    }
+    return;
+}
+
 #if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3)
 template class ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>;
 template bool ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>::ExecuteGraph<CallbackContext_1_2>(
@@ -628,6 +844,12 @@ template bool ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>::ExecuteGraph<CallbackC
         armnn::InputTensors& pInputTensors,
         armnn::OutputTensors& pOutputTensors,
         CallbackContext_1_2 cb);
+
+template void ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_2>(
+                std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+                std::shared_ptr<armnn::InputTensors>& inputTensors,
+                std::shared_ptr<armnn::OutputTensors>& outputTensors,
+                CallbackContext_1_2 callbackContext);
 #endif
 
 } // namespace armnn_driver
diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp
index 049d347..57deb98 100644
--- a/ArmnnPreparedModel_1_2.hpp
+++ b/ArmnnPreparedModel_1_2.hpp
@@ -12,6 +12,7 @@
 
 #include <NeuralNetworks.h>
 #include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
 
 #include <string>
 #include <vector>
@@ -44,22 +45,36 @@ public:
                            armnn::IRuntime* runtime,
                            const HalModel& model,
                            const std::string& requestInputsAndOutputsDumpDir,
-                           const bool gpuProfilingEnabled);
+                           const bool gpuProfilingEnabled,
+                           const bool asyncModelExecutionEnabled = false,
+                           const unsigned int numberOfThreads = 1,
+                           const bool importEnabled = false,
+                           const bool exportEnabled = false);
+
+    ArmnnPreparedModel_1_2(armnn::NetworkId networkId,
+                           armnn::IRuntime* runtime,
+                           const std::string& requestInputsAndOutputsDumpDir,
+                           const bool gpuProfilingEnabled,
+                           const bool asyncModelExecutionEnabled = false,
+                           const unsigned int numberOfThreads = 1,
+                           const bool importEnabled = false,
+                           const bool exportEnabled = false,
+                           const bool preparedFromCache = false);
 
     virtual ~ArmnnPreparedModel_1_2();
 
     virtual Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
-                                              const sp<V1_0::IExecutionCallback>& callback) override;
+                                              const ::android::sp<V1_0::IExecutionCallback>& callback) override;
 
     virtual Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request& request, V1_2::MeasureTiming measure,
-                                                  const sp<V1_2::IExecutionCallback>& callback) override;
+                                                  const ::android::sp<V1_2::IExecutionCallback>& callback) override;
 
     virtual Return<void> executeSynchronously(const V1_0::Request &request,
                                               V1_2::MeasureTiming measure,
                                               V1_2::IPreparedModel::executeSynchronously_cb cb) override;
 
     virtual Return<void> configureExecutionBurst(
-            const sp<V1_2::IBurstCallback>& callback,
+            const ::android::sp<V1_2::IBurstCallback>& callback,
             const android::hardware::MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
             const android::hardware::MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
             configureExecutionBurst_cb cb) override;
@@ -73,9 +88,38 @@ public:
 
     /// Executes this model with dummy inputs (e.g. all zeroes).
     /// \return false on failure, otherwise true
-    bool ExecuteWithDummyInputs();
+    bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
 
 private:
+
+    template<typename CallbackContext>
+    class ArmnnThreadPoolCallback_1_2 : public armnn::IAsyncExecutionCallback
+    {
+    public:
+        ArmnnThreadPoolCallback_1_2(ArmnnPreparedModel_1_2<HalVersion>* model,
+                                    std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+                                    std::vector<V1_2::OutputShape> outputShapes,
+                                    std::shared_ptr<armnn::InputTensors>& inputTensors,
+                                    std::shared_ptr<armnn::OutputTensors>& outputTensors,
+                                    CallbackContext callbackContext) :
+                m_Model(model),
+                m_MemPools(pMemPools),
+                m_OutputShapes(outputShapes),
+                m_InputTensors(inputTensors),
+                m_OutputTensors(outputTensors),
+                m_CallbackContext(callbackContext)
+        {}
+
+        void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
+
+        ArmnnPreparedModel_1_2<HalVersion>* m_Model;
+        std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+        std::vector<V1_2::OutputShape> m_OutputShapes;
+        std::shared_ptr<armnn::InputTensors> m_InputTensors;
+        std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+        CallbackContext m_CallbackContext;
+    };
+
     Return<V1_0::ErrorStatus> Execute(const V1_0::Request& request,
                                       V1_2::MeasureTiming measureTiming,
                                       CallbackAsync_1_2 callback);
@@ -101,17 +145,32 @@ private:
     template <typename TensorBindingCollection>
     void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
 
-    armnn::NetworkId                                                            m_NetworkId;
-    armnn::IRuntime*                                                            m_Runtime;
-    V1_2::Model                                                                 m_Model;
+    /// schedule the graph prepared from the request for execution
+    template<typename CallbackContext>
+    void ScheduleGraphForExecution(
+            std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+            std::shared_ptr<armnn::InputTensors>& inputTensors,
+            std::shared_ptr<armnn::OutputTensors>& outputTensors,
+            CallbackContext m_CallbackContext);
+
+    armnn::NetworkId                          m_NetworkId;
+    armnn::IRuntime*                          m_Runtime;
+    V1_2::Model                               m_Model;
     // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
     // It is specific to this class, so it is declared as static here
     static RequestThread<ArmnnPreparedModel_1_2,
                          HalVersion,
-                         CallbackContext_1_2>                                   m_RequestThread;
-    uint32_t                                                                    m_RequestCount;
-    const std::string&                                                          m_RequestInputsAndOutputsDumpDir;
-    const bool                                                                  m_GpuProfilingEnabled;
+                         CallbackContext_1_2> m_RequestThread;
+    uint32_t                                  m_RequestCount;
+    const std::string&                        m_RequestInputsAndOutputsDumpDir;
+    const bool                                m_GpuProfilingEnabled;
+    // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+    static std::unique_ptr<armnn::Threadpool> m_Threadpool;
+    std::shared_ptr<IWorkingMemHandle>        m_WorkingMemHandle;
+    const bool                                m_AsyncModelExecutionEnabled;
+    const bool                                m_EnableImport;
+    const bool                                m_EnableExport;
+    const bool                                m_PreparedFromCache;
 };
 
 }
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index 353bdaf..7dc692f 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 // Note: the ArmnnFencedExecutionCallback and code snippet in the executeFenced() function
@@ -12,6 +12,8 @@
 #include "ArmnnPreparedModel_1_3.hpp"
 #include "Utils.hpp"
 
+#include <armnn/Types.hpp>
+
 #include <Utils.h>
 #include <android/sync.h>
 #include <log/log.h>
@@ -19,9 +21,13 @@
 #include <ExecutionBurstServer.h>
 #include <ValidateHal.h>
 
-#include <cassert>
+#include <chrono>
 #include <cinttypes>
 
+#ifdef ARMNN_ANDROID_S
+#include <LegacyUtils.h>
+#endif
+
 using namespace android;
 using namespace android::hardware;
 
@@ -141,6 +147,9 @@ RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3>
         ArmnnPreparedModel_1_3<HalVersion>::m_RequestThread;
 
 template<typename HalVersion>
+std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel_1_3<HalVersion>::m_Threadpool(nullptr);
+
+template<typename HalVersion>
 template<typename TensorBindingCollection>
 void ArmnnPreparedModel_1_3<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
                                                                const TensorBindingCollection& tensorBindings)
@@ -164,7 +173,11 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
                                                            const V1_3::Model& model,
                                                            const std::string& requestInputsAndOutputsDumpDir,
                                                            const bool gpuProfilingEnabled,
-                                                           V1_3::Priority priority)
+                                                           V1_3::Priority priority,
+                                                           const bool asyncModelExecutionEnabled,
+                                                           const unsigned int numberOfThreads,
+                                                           const bool importEnabled,
+                                                           const bool exportEnabled)
     : m_NetworkId(networkId)
     , m_Runtime(runtime)
     , m_Model(model)
@@ -172,9 +185,79 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
     , m_GpuProfilingEnabled(gpuProfilingEnabled)
     , m_ModelPriority(priority)
+    , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_EnableImport(importEnabled)
+    , m_EnableExport(exportEnabled)
+    , m_PreparedFromCache(false)
 {
     // Enable profiling if required.
     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+        for (unsigned int i=0; i < numberOfThreads; ++i)
+        {
+            memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+        }
+
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
+        m_WorkingMemHandle = memHandles.back();
+    }
+}
+
+template<typename HalVersion>
+ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+                                                           armnn::IRuntime* runtime,
+                                                           const std::string& requestInputsAndOutputsDumpDir,
+                                                           const bool gpuProfilingEnabled,
+                                                           V1_3::Priority priority,
+                                                           const bool asyncModelExecutionEnabled,
+                                                           const unsigned int numberOfThreads,
+                                                           const bool importEnabled,
+                                                           const bool exportEnabled,
+                                                           const bool preparedFromCache)
+    : m_NetworkId(networkId)
+    , m_Runtime(runtime)
+    , m_RequestCount(0)
+    , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
+    , m_GpuProfilingEnabled(gpuProfilingEnabled)
+    , m_ModelPriority(priority)
+    , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
+    , m_EnableImport(importEnabled)
+    , m_EnableExport(exportEnabled)
+    , m_PreparedFromCache(preparedFromCache)
+{
+    // Enable profiling if required.
+    m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+        for (unsigned int i=0; i < numberOfThreads; ++i)
+        {
+            memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(networkId));
+        }
+
+        if (!m_Threadpool)
+        {
+            m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
+        }
+        else
+        {
+            m_Threadpool->LoadMemHandles(memHandles);
+        }
+
+        m_WorkingMemHandle = memHandles.back();
+    }
 }
 
 template<typename HalVersion>
@@ -182,12 +265,21 @@ ArmnnPreparedModel_1_3<HalVersion>::~ArmnnPreparedModel_1_3()
 {
     // Get a hold of the profiler used by this model.
     std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+    if (profiler && m_GpuProfilingEnabled)
+    {
+        // Dump the profiling info to a file if required.
+        DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId,
+                                    profiler.get());
+    }
 
     // Unload the network associated with this model.
     m_Runtime->UnloadNetwork(m_NetworkId);
 
-    // Dump the profiling info to a file if required.
-    DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
+    // Unload the network memhandles from the threadpool
+    if (m_AsyncModelExecutionEnabled)
+    {
+        m_Threadpool->UnloadMemHandles(m_NetworkId);
+    }
 }
 
 template<typename HalVersion>
@@ -308,7 +400,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
         ALOGW("ArmnnPreparedModel_1_3::executeFenced parameter loopTimeoutDuration is set but not supported.");
     }
 
-    if (!android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model, /*allowUnspecifiedOutput=*/false))
     {
         ALOGV("ArmnnPreparedModel_1_3::executeFenced outputs must be specified for fenced execution ");
         cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hidl_handle(nullptr), nullptr);
@@ -322,7 +414,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
         ctx.driverStart = Now();
     }
 
-    ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::executeFenced(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (!m_RequestInputsAndOutputsDumpDir.empty())
@@ -400,8 +495,9 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeFenced(const V1_3::Reque
     {
         fenceTiming.timeOnDevice = MicrosecondsDuration(ctx.deviceEnd, ctx.deviceStart);
         fenceTiming.timeInDriver = MicrosecondsDuration(ctx.driverEnd, fenceExecutionStart);
-        ALOGV("ArmnnPreparedModel_1_3::fenceFinishExecutionTiming - Device = %" PRIu64 " Driver = %" PRIu64,
-              fenceTiming.timeOnDevice, fenceTiming.timeInDriver);
+        ALOGV("ArmnnPreparedModel_1_3::fenceFinishExecutionTiming - Device = %lu Driver = %lu",
+              static_cast<unsigned long>(fenceTiming.timeOnDevice),
+              static_cast<unsigned long>(fenceTiming.timeInDriver));
     }
 
     sp<ArmnnFencedExecutionCallback> armnnFencedExecutionCallback =
@@ -420,32 +516,25 @@ Return<V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::PrepareMemoryForIn
     for (unsigned int i = 0; i < request.inputs.size(); i++)
     {
         const auto& inputArg = request.inputs[i];
-
-        const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
-        const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
-
-        uint32_t poolIndex = inputArg.location.poolIndex;
-        if (poolIndex >= memPools.size())
+        armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        // inputs (of type InputTensors) is composed of a vector of ConstTensors.
+        // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+        inputTensorInfo.SetConstant();
+        auto result = ValidateRequestArgument<V1_3::ErrorStatus, V1_3::Request>(request,
+                                                                                inputTensorInfo,
+                                                                                inputArg,
+                                                                                "input");
+
+        if (result != V1_3::ErrorStatus::NONE)
         {
-            ALOGE("Cannot execute request. Error converting request input %u to tensor: wrong poolIndex", i);
-            return V1_3::ErrorStatus::GENERAL_FAILURE;
+            return result;
         }
 
-        uint8_t* inputTensorBegin = static_cast<uint8_t*>(inputTensor.GetMemoryArea());
-        if (inputTensorBegin == nullptr)
-        {
-            ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
-            return V1_3::ErrorStatus::GENERAL_FAILURE;
-        }
-
-        const size_t inputTensorSize = inputTensorInfo.GetNumBytes();
-        uint8_t* memoryPoolBegin = memPools[poolIndex].getBuffer();
-        uint32_t memoryPoolSize = memPools[poolIndex].getSize();
-        bool inputTensorIsOutOfMemoryRage = (inputTensorBegin + inputTensorSize) > (memoryPoolBegin + memoryPoolSize);
+        const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, memPools);
 
-        if (inputTensorIsOutOfMemoryRage)
+        if (inputTensor.GetMemoryArea() == nullptr)
         {
-            ALOGE("Cannot execute request. Error converting request input %u to tensor: out of Memory Pool", i);
+            ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
             return V1_3::ErrorStatus::GENERAL_FAILURE;
         }
 
@@ -466,32 +555,25 @@ Return<V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::PrepareMemoryForOu
     for (unsigned int i = 0; i < request.outputs.size(); i++)
     {
         const auto& outputArg = request.outputs[i];
-
         armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
-        const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
-        uint8_t* outputTensorBegin = static_cast<uint8_t*>(outputTensor.GetMemoryArea());
-        if (outputTensorBegin == nullptr)
-        {
-            ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
-            return V1_3::ErrorStatus::GENERAL_FAILURE;
-        }
+        auto result = ValidateRequestArgument<V1_3::ErrorStatus, V1_3::Request>(request,
+                                                                                outputTensorInfo,
+                                                                                outputArg,
+                                                                                "output");
 
-        const size_t outputSize = outputTensorInfo.GetNumBytes();
-        uint32_t poolIndex = outputArg.location.poolIndex;
-        if (poolIndex >= memPools.size())
+        if (result != V1_3::ErrorStatus::NONE)
         {
-            ALOGE("Cannot execute request. Error converting request output %u to tensor: wrong poolIndex", i);
-            return V1_3::ErrorStatus::GENERAL_FAILURE;
+            return result;
         }
 
-        uint8_t* memoryPoolBegin = memPools[poolIndex].getBuffer();
-        uint32_t memoryPoolSize = memPools[poolIndex].getSize();
-        bool outputTensorIsOutOfMemoryRage = (outputTensorBegin + outputSize) > (memoryPoolBegin + memoryPoolSize);
-        if (outputTensorIsOutOfMemoryRage)
+        const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, memPools);
+
+        if (outputTensor.GetMemoryArea() == nullptr)
         {
-            ALOGE("Cannot execute request. Error converting request output %u to tensor: out of Memory Pool", i);
+            ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
             return V1_3::ErrorStatus::GENERAL_FAILURE;
         }
+        const size_t outputSize = outputTensorInfo.GetNumBytes();
 
         unsigned int count = 0;
         std::for_each(outputArg.dimensions.begin(), outputArg.dimensions.end(), [&](auto dim)
@@ -519,7 +601,12 @@ Return<V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::PrepareMemoryForOu
             return V1_3::ErrorStatus::OUTPUT_INSUFFICIENT_SIZE;
         }
 
-        const size_t bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
+        size_t bufferSize = 0;
+#if !defined(ARMNN_ANDROID_S)
+        bufferSize = memPools.at(outputArg.location.poolIndex).getHidlMemory().size();
+#else
+        bufferSize = memPools.at(outputArg.location.poolIndex).getSize();
+#endif
         if (bufferSize < outputSize)
         {
             ALOGW("ArmnnPreparedModel_1_3::Execute failed bufferSize (%s) < outputSize (%s)",
@@ -539,7 +626,11 @@ std::tuple<V1_3::ErrorStatus, hidl_vec<V1_2::OutputShape>, V1_2::Timing, std::st
                                                            std::vector<android::nn::RunTimePoolInfo>& memPools,
                                                            const V1_3::Request& request)
 {
+#if !defined(ARMNN_ANDROID_S)
+    if (!setRunTimePoolInfosFromMemoryPools(&memPools, request.pools))
+#else
     if (!setRunTimePoolInfosFromMemoryPools(&memPools, uncheckedConvert(request.pools)))
+#endif
     {
         return {V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute"};
     }
@@ -584,7 +675,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::ExecuteSynchronously(const V1_3
         cbCtx.ctx.driverStart = Now();
     }
 
-    if (!android::nn::validateRequest(convertToV1_3(request), m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(convertToV1_3(request), m_Model))
     {
         ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
         cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -594,7 +685,7 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::ExecuteSynchronously(const V1_3
         return Void();
     }
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         ALOGE("ArmnnPreparedModel_1_3::ExecuteSynchronously invalid request model");
         cbCtx.callback(V1_3::ErrorStatus::INVALID_ARGUMENT,
@@ -631,7 +722,10 @@ Return<void> ArmnnPreparedModel_1_3<HalVersion>::executeSynchronously(const V1_0
                                                                       V1_2::MeasureTiming measureTiming,
                                                                       executeSynchronously_cb cb)
 {
-    ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::executeSynchronously(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (cb == nullptr)
@@ -664,7 +758,10 @@ Return<void>  ArmnnPreparedModel_1_3<HalVersion>::executeSynchronously_1_3(
         const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
         executeSynchronously_1_3_cb cb)
 {
-    ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::executeSynchronously_1_3(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
     if (cb == nullptr)
@@ -733,6 +830,8 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
     CallbackContext cb)
 {
     ALOGV("ArmnnPreparedModel_1_3::ExecuteGraph(...)");
+    // Capture the graph execution start time.
+    std::chrono::time_point<std::chrono::system_clock> graphExecutionStart = std::chrono::system_clock::now();
 
     DumpTensorsIfRequired("Input", inputTensors);
 
@@ -753,8 +852,29 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
         {
             cb.ctx.deviceStart = Now();
         }
-
-        armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
+        armnn::Status status;
+        if (m_AsyncModelExecutionEnabled)
+        {
+            ALOGW("ArmnnPreparedModel_1_3::ExecuteGraph m_AsyncModelExecutionEnabled true");
+            status = m_Runtime->Execute(*m_WorkingMemHandle, inputTensors, outputTensors);
+        }
+        else
+        {
+            ALOGW("ArmnnPreparedModel_1_3::ExecuteGraph m_AsyncModelExecutionEnabled false");
+            // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
+            std::vector<armnn::ImportedInputId> importedInputIds;
+            if (m_EnableImport)
+            {
+                importedInputIds =  m_Runtime->ImportInputs(m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
+            }
+            std::vector<armnn::ImportedOutputId> importedOutputIds;
+            if (m_EnableExport)
+            {
+                importedOutputIds = m_Runtime->ImportOutputs(m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
+            }
+            status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors,
+                                                importedInputIds, importedOutputIds);
+        }
 
         if (cb.ctx.measureTimings == V1_2::MeasureTiming::YES)
         {
@@ -762,7 +882,7 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
         }
         if (status != armnn::Status::Success)
         {
-            ALOGW("EnqueueWorkload failed");
+            ALOGW("ArmnnPreparedModel_1_3::ExecuteGraph EnqueueWorkload failed");
             cb.callback(V1_3::ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
             return V1_3::ErrorStatus::GENERAL_FAILURE;
         }
@@ -790,24 +910,74 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::ExecuteGraph(
         V1_2::Timing timing;
         timing.timeOnDevice = MicrosecondsDuration(cb.ctx.deviceEnd, cb.ctx.deviceStart);
         timing.timeInDriver = MicrosecondsDuration(cb.ctx.driverEnd, cb.ctx.driverStart);
-        ALOGV("ArmnnPreparedModel_1_3::execute timing - Device = %" PRIu64 " Driver = %" PRIu64, timing.timeOnDevice,
-              timing.timeInDriver);
+        ALOGV("ArmnnPreparedModel_1_3::execute timing - Device = %lu Driver = %lu",
+              static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
         cb.callback(V1_3::ErrorStatus::NONE, outputShapes, timing, "ArmnnPreparedModel_1_3::ExecuteGraph");
     } else
     {
         cb.callback(V1_3::ErrorStatus::NONE, outputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
     }
+    // Log the total time in this call. This is a good number to compare to that printed out by
+    // RuntimeImpl::EnqueueWorkload. The difference should be the execution overhead of the driver.
+    ALOGI("ArmnnPreparedModel_1_3::ExecuteGraph Execution time = %lld µs",
+          std::chrono::duration_cast<std::chrono::microseconds>
+          (std::chrono::system_clock::now() - graphExecutionStart).count());
     return V1_3::ErrorStatus::NONE;
 }
 
+/// Schedule the graph prepared from the request for execution
 template<typename HalVersion>
-bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
+template<typename CallbackContext>
+void ArmnnPreparedModel_1_3<HalVersion>::ScheduleGraphForExecution(
+        std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+        std::shared_ptr<armnn::InputTensors>& inputTensors,
+        std::shared_ptr<armnn::OutputTensors>& outputTensors,
+        CallbackContext callbackContext,
+        armnn::QosExecPriority priority)
+{
+    ALOGV("ArmnnPreparedModel_1_3::ScheduleGraphForExecution(...)");
+
+    DumpTensorsIfRequired("Input", *inputTensors);
+
+    unsigned int outputTensorSize = outputTensors.get()->size();
+    std::vector<V1_2::OutputShape> outputShapes(outputTensorSize);
+    for (unsigned int i = 0; i < outputTensorSize; i++)
+    {
+        std::pair<int, armnn::Tensor> outputTensorPair = outputTensors.get()->at(i);
+        const armnn::Tensor outputTensor = outputTensorPair.second;
+        const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo();
+
+        outputShapes[i] = ComputeShape(outputTensorInfo);
+    }
+
+    auto tpCb = std::make_shared<
+        ArmnnThreadPoolCallback_1_3<CallbackContext_1_3>>(this,
+                                                          pMemPools,
+                                                          outputShapes,
+                                                          inputTensors,
+                                                          outputTensors,
+                                                          callbackContext);
+
+    m_Threadpool->Schedule(m_NetworkId,
+                           *tpCb->m_InputTensors,
+                           *tpCb->m_OutputTensors,
+                           priority,
+                           tpCb);
+    ALOGV("ArmnnPreparedModel_1_3::ScheduleGraphForExecution end");
+}
+
+template<typename HalVersion>
+bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs)
 {
     std::vector<std::vector<char>> storage;
     armnn::InputTensors inputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numInputs; i++)
     {
-        const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
+        // pInputTensors (of type InputTensors) is composed of a vector of ConstTensors.
+        // Therefore, set all TensorInfo isConstant parameters of input Tensors to true.
+        inputTensorInfo.SetConstant();
+
         storage.emplace_back(inputTensorInfo.GetNumBytes());
         const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
 
@@ -815,7 +985,7 @@ bool ArmnnPreparedModel_1_3<HalVersion>::ExecuteWithDummyInputs()
     }
 
     armnn::OutputTensors outputTensors;
-    for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
+    for (unsigned int i = 0; i < numOutputs; i++)
     {
         const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
         storage.emplace_back(outputTensorInfo.GetNumBytes());
@@ -849,10 +1019,13 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_
         ctx.driverStart = Now();
     }
 
-    ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+    if (!m_PreparedFromCache)
+    {
+        ALOGV("ArmnnPreparedModel_1_3::execute(): %s", GetModelSummary(m_Model).c_str());
+    }
     m_RequestCount++;
 
-    if (!android::nn::validateRequest(request, m_Model))
+    if (!m_PreparedFromCache && !android::nn::validateRequest(request, m_Model))
     {
         callback(V1_3::ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_3::execute");
         return V1_3::ErrorStatus::INVALID_ARGUMENT;
@@ -884,16 +1057,51 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_
             return V1_3::ErrorStatus::NONE;
         case V1_3::ErrorStatus::GENERAL_FAILURE:
             return V1_3::ErrorStatus::GENERAL_FAILURE;
+        case V1_3::ErrorStatus::INVALID_ARGUMENT:
+            return V1_3::ErrorStatus::INVALID_ARGUMENT;
         default:
         {}
     }
-
-    ALOGV("ArmnnPreparedModel_1_3::execute(...) before PostMsg");
-
-    // post the request for asynchronous execution
     CallbackContext_1_3 cb;
     cb.callback = callback;
     cb.ctx = ctx;
+
+
+    enum class QosExecPriority
+    {
+        Low    = 0,
+        Medium = 1,
+        High   = 2
+    };
+
+
+    if (m_AsyncModelExecutionEnabled)
+    {
+        armnn::QosExecPriority priority;
+
+        switch (GetModelPriority()) {
+            case V1_3::Priority::LOW:
+                priority = armnn::QosExecPriority::Low;
+                break;
+            case V1_3::Priority::MEDIUM:
+                priority = armnn::QosExecPriority::Medium;
+                break;
+            case V1_3::Priority::HIGH:
+                priority = armnn::QosExecPriority::High;
+                break;
+            default:
+                priority = armnn::QosExecPriority::Medium;
+
+        }
+
+        ALOGV("ArmnnPreparedModel_1_3::execute(...) before ScheduleGraphForExecution");
+        ScheduleGraphForExecution(memPools, inputTensors, outputTensors, cb, priority);
+        ALOGV("ArmnnPreparedModel_1_3::execute(...) after ScheduleGraphForExecution");
+        return V1_3::ErrorStatus::NONE;
+    }
+
+    ALOGV("ArmnnPreparedModel_1_3::execute(...) before PostMsg");
+    // post the request for asynchronous execution
     m_RequestThread.PostMsg(this, memPools, inputTensors, outputTensors, cb);
     ALOGV("ArmnnPreparedModel_1_3::execute(...) after PostMsg");
     return V1_3::ErrorStatus::NONE;
@@ -905,6 +1113,46 @@ V1_3::Priority ArmnnPreparedModel_1_3<HalVersion>::GetModelPriority()
     return m_ModelPriority;
 }
 
+template<typename HalVersion>
+template <typename CallbackContext>
+void ArmnnPreparedModel_1_3<HalVersion>::ArmnnThreadPoolCallback_1_3<CallbackContext>::Notify(
+        armnn::Status status, armnn::InferenceTimingPair timeTaken)
+{
+    ALOGV("ArmnnPreparedModel_1_3::ArmnnThreadPoolCallback_1_3<CallbackContext>::Notify");
+    CommitPools(*m_MemPools);
+
+     m_Model->DumpTensorsIfRequired("Output", *m_OutputTensors);
+
+    if (status != armnn::Status::Success)
+    {
+        ALOGW("ArmnnThreadPoolCallback_1_3::Notify EnqueueWorkload failed");
+        m_CallbackContext.callback(V1_3::ErrorStatus::GENERAL_FAILURE,
+                                   {},
+                                   g_NoTiming,
+                                   "ArmnnPreparedModel_1_3::ArmnnThreadPoolCallback_1_3");
+        return;
+    }
+
+    if (m_CallbackContext.ctx.measureTimings == V1_2::MeasureTiming::YES)
+    {
+        m_CallbackContext.ctx.deviceStart = timeTaken.first;
+        m_CallbackContext.ctx.deviceEnd = timeTaken.second;
+        m_CallbackContext.ctx.driverEnd = std::chrono::steady_clock::now();
+        V1_2::Timing timing;
+        timing.timeOnDevice = MicrosecondsDuration(m_CallbackContext.ctx.deviceEnd, m_CallbackContext.ctx.deviceStart);
+        timing.timeInDriver = MicrosecondsDuration(m_CallbackContext.ctx.driverEnd, m_CallbackContext.ctx.driverStart);
+        ALOGV("ArmnnPreparedModel_1_3::execute timing - Device = %lu Driver = %lu",
+              static_cast<unsigned long>(timing.timeOnDevice), static_cast<unsigned long>(timing.timeInDriver));
+        m_CallbackContext.callback(
+                V1_3::ErrorStatus::NONE, m_OutputShapes, timing, "ArmnnPreparedModel_1_3::ExecuteGraph");
+    } else
+    {
+        m_CallbackContext.callback(
+                V1_3::ErrorStatus::NONE, m_OutputShapes, g_NoTiming, "ArmnnPreparedModel_1_3::ExecuteGraph");
+    }
+    return;
+}
+
 #ifdef ARMNN_ANDROID_NN_V1_3
 template class ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>;
 template Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ExecuteGraph<CallbackContext_1_3>(
@@ -912,6 +1160,13 @@ template Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::
         armnn::InputTensors& pInputTensors,
         armnn::OutputTensors& pOutputTensors,
         CallbackContext_1_3 cb);
+
+template void ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_3>(
+                std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+                std::shared_ptr<armnn::InputTensors>& inputTensors,
+                std::shared_ptr<armnn::OutputTensors>& outputTensors,
+                CallbackContext_1_3 callbackContext,
+                armnn::QosExecPriority priority);
 #endif
 
 } // namespace armnn_driver
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index 5dcc202..6c1c5c2 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -12,6 +12,8 @@
 
 #include <NeuralNetworks.h>
 #include <armnn/ArmNN.hpp>
+#include <armnn/Threadpool.hpp>
+
 
 #include <string>
 #include <vector>
@@ -51,21 +53,36 @@ public:
                            const HalModel& model,
                            const std::string& requestInputsAndOutputsDumpDir,
                            const bool gpuProfilingEnabled,
-                           V1_3::Priority priority = V1_3::Priority::MEDIUM);
+                           V1_3::Priority priority = V1_3::Priority::MEDIUM,
+                           const bool asyncModelExecutionEnabled = false,
+                           const unsigned int numberOfThreads = 1,
+                           const bool importEnabled = false,
+                           const bool exportEnabled = false);
+
+    ArmnnPreparedModel_1_3(armnn::NetworkId networkId,
+                           armnn::IRuntime* runtime,
+                           const std::string& requestInputsAndOutputsDumpDir,
+                           const bool gpuProfilingEnabled,
+                           V1_3::Priority priority = V1_3::Priority::MEDIUM,
+                           const bool asyncModelExecutionEnabled = false,
+                           const unsigned int numberOfThreads = 1,
+                           const bool importEnabled = false,
+                           const bool exportEnabled = false,
+                           const bool preparedFromCache = false);
 
     virtual ~ArmnnPreparedModel_1_3();
 
     Return<V1_0::ErrorStatus> execute(const V1_0::Request& request,
-                                      const sp<V1_0::IExecutionCallback>& callback) override;
+                                      const ::android::sp<V1_0::IExecutionCallback>& callback) override;
 
     Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request& request, V1_2::MeasureTiming measure,
-                                          const sp<V1_2::IExecutionCallback>& callback) override;
+                                          const ::android::sp<V1_2::IExecutionCallback>& callback) override;
 
     Return<V1_3::ErrorStatus> execute_1_3(const V1_3::Request& request,
                                           V1_2::MeasureTiming measure,
                                           const V1_3::OptionalTimePoint&,
                                           const V1_3::OptionalTimeoutDuration&,
-                                          const sp<V1_3::IExecutionCallback>& callback) override;
+                                          const ::android::sp<V1_3::IExecutionCallback>& callback) override;
 
     Return<void> executeSynchronously(const V1_0::Request &request,
                                       V1_2::MeasureTiming measure,
@@ -86,7 +103,7 @@ public:
                                executeFenced_cb callback) override;
 
     Return<void> configureExecutionBurst(
-            const sp<V1_2::IBurstCallback>& callback,
+            const ::android::sp<V1_2::IBurstCallback>& callback,
             const android::hardware::MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
             const android::hardware::MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
             configureExecutionBurst_cb cb) override;
@@ -104,11 +121,40 @@ public:
 
     /// Executes this model with dummy inputs (e.g. all zeroes).
     /// \return false on failure, otherwise true
-    bool ExecuteWithDummyInputs();
+    bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs);
 
     V1_3::Priority GetModelPriority();
 
 private:
+
+    template<typename CallbackContext>
+    class ArmnnThreadPoolCallback_1_3 : public armnn::IAsyncExecutionCallback
+    {
+    public:
+        ArmnnThreadPoolCallback_1_3(ArmnnPreparedModel_1_3<HalVersion>* model,
+                                    std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+                                    std::vector<V1_2::OutputShape> outputShapes,
+                                    std::shared_ptr<armnn::InputTensors>& inputTensors,
+                                    std::shared_ptr<armnn::OutputTensors>& outputTensors,
+                                    CallbackContext callbackContext) :
+                m_Model(model),
+                m_MemPools(pMemPools),
+                m_OutputShapes(outputShapes),
+                m_InputTensors(inputTensors),
+                m_OutputTensors(outputTensors),
+                m_CallbackContext(callbackContext)
+        {}
+
+        void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override;
+
+        ArmnnPreparedModel_1_3<HalVersion>* m_Model;
+        std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+        std::vector<V1_2::OutputShape> m_OutputShapes;
+        std::shared_ptr<armnn::InputTensors> m_InputTensors;
+        std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+        CallbackContext m_CallbackContext;
+    };
+
     Return <V1_3::ErrorStatus> Execute(const V1_3::Request& request,
                                        V1_2::MeasureTiming measureTiming,
                                        CallbackAsync_1_3 callback);
@@ -124,7 +170,7 @@ private:
         const V1_3::Request& request,
         const std::vector<android::nn::RunTimePoolInfo>& memPools);
 
-    std::tuple<V1_3::ErrorStatus, android::hardware::hidl_vec<V1_2::OutputShape>, V1_2::Timing, std::string> PrepareMemoryForIO(
+    std::tuple<V1_3::ErrorStatus, hidl_vec<V1_2::OutputShape>, V1_2::Timing, std::string> PrepareMemoryForIO(
         armnn::InputTensors& inputs,
         armnn::OutputTensors& outputs,
         std::vector<android::nn::RunTimePoolInfo>& memPools,
@@ -133,16 +179,35 @@ private:
     template <typename TensorBindingCollection>
     void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings);
 
-    armnn::NetworkId                                                            m_NetworkId;
-    armnn::IRuntime*                                                            m_Runtime;
-    V1_3::Model                                                                 m_Model;
+    /// schedule the graph prepared from the request for execution
+    template<typename CallbackContext>
+    void ScheduleGraphForExecution(
+            std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
+            std::shared_ptr<armnn::InputTensors>& inputTensors,
+            std::shared_ptr<armnn::OutputTensors>& outputTensors,
+            CallbackContext m_CallbackContext,
+            armnn::QosExecPriority priority);
+
+    armnn::NetworkId                               m_NetworkId;
+    armnn::IRuntime*                               m_Runtime;
+    V1_3::Model                                    m_Model;
     // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
     // It is specific to this class, so it is declared as static here
-    static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread;
-    uint32_t                                                                    m_RequestCount;
-    const std::string&                                                          m_RequestInputsAndOutputsDumpDir;
-    const bool                                                                  m_GpuProfilingEnabled;
-    V1_3::Priority                                                              m_ModelPriority;
+    static RequestThread_1_3<ArmnnPreparedModel_1_3,
+                             HalVersion,
+                             CallbackContext_1_3>  m_RequestThread;
+    uint32_t                                       m_RequestCount;
+    const std::string&                             m_RequestInputsAndOutputsDumpDir;
+    const bool                                     m_GpuProfilingEnabled;
+    V1_3::Priority                                 m_ModelPriority;
+
+    // Static to allow sharing of threadpool between ArmnnPreparedModel instances
+    static std::unique_ptr<armnn::Threadpool>      m_Threadpool;
+    std::shared_ptr<IWorkingMemHandle>             m_WorkingMemHandle;
+    const bool                                     m_AsyncModelExecutionEnabled;
+    const bool                                     m_EnableImport;
+    const bool                                     m_EnableExport;
+    const bool                                     m_PreparedFromCache;
 };
 
 }
diff --git a/CacheDataHandler.cpp b/CacheDataHandler.cpp
new file mode 100644
index 0000000..5f3a307
--- /dev/null
+++ b/CacheDataHandler.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CacheDataHandler.hpp"
+
+#include <log/log.h>
+
+namespace armnn_driver
+{
+
+CacheDataHandler& CacheDataHandlerInstance()
+{
+    static CacheDataHandler instance;
+    return instance;
+}
+
+void CacheDataHandler::Register(const HidlToken token, const size_t hashValue, const size_t cacheSize)
+{
+    if (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end()
+                        && m_CacheDataMap.at(hashValue).GetToken() == token
+                        && m_CacheDataMap.at(hashValue).GetCacheSize() == cacheSize)
+    {
+        ALOGV("CacheHandler::Register() Hash value has already registered.");
+        return;
+    }
+    CacheHandle cacheHandle(token, cacheSize);
+    m_CacheDataMap.insert({hashValue, cacheHandle});
+}
+
+bool CacheDataHandler::Validate(const HidlToken token, const size_t hashValue, const size_t cacheSize) const
+{
+    return (m_CacheDataMap.find(hashValue) != m_CacheDataMap.end()
+                             && m_CacheDataMap.at(hashValue).GetToken() == token
+                             && m_CacheDataMap.at(hashValue).GetCacheSize() == cacheSize);
+}
+
+size_t CacheDataHandler::Hash(std::vector<uint8_t>& cacheData)
+{
+    std::size_t hash = cacheData.size();
+    for (auto& i : cacheData)
+    {
+        hash = ((hash << 5) - hash) + i;
+    }
+    return hash;
+}
+
+size_t CacheDataHandler::GetCacheSize(HidlToken token)
+{
+    for (auto i = m_CacheDataMap.begin(); i != m_CacheDataMap.end(); ++i)
+    {
+        if (i->second.GetToken() == token)
+        {
+            return i->second.GetCacheSize();
+        }
+    }
+    return 0;
+}
+
+void CacheDataHandler::Clear()
+{
+    m_CacheDataMap.clear();
+}
+
+} // armnn_driver
diff --git a/CacheDataHandler.hpp b/CacheDataHandler.hpp
new file mode 100644
index 0000000..5b1b295
--- /dev/null
+++ b/CacheDataHandler.hpp
@@ -0,0 +1,68 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <HalInterfaces.h>
+
+#include <vector>
+#include <unordered_map>
+
+#include <NeuralNetworks.h>
+
+namespace armnn_driver
+{
+
+using HidlToken = android::hardware::hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
+
+class CacheHandle
+{
+public:
+    CacheHandle(const HidlToken token, const size_t cacheSize)
+    : m_HidlToken(token), m_CacheSize(cacheSize) {}
+
+    ~CacheHandle() {};
+
+    HidlToken GetToken() const
+    {
+        return m_HidlToken;
+    }
+
+    size_t GetCacheSize() const
+    {
+        return m_CacheSize;
+    }
+
+private:
+    const HidlToken m_HidlToken;
+    const size_t m_CacheSize;
+};
+
+class CacheDataHandler
+{
+public:
+    CacheDataHandler() {}
+    ~CacheDataHandler() {}
+
+    void Register(const HidlToken token, const size_t hashValue, const size_t cacheSize);
+
+    bool Validate(const HidlToken token, const size_t hashValue, const size_t cacheSize) const;
+
+    size_t Hash(std::vector<uint8_t>& cacheData);
+
+    size_t GetCacheSize(HidlToken token);
+
+    void Clear();
+
+private:
+    CacheDataHandler(const CacheDataHandler&) = delete;
+    CacheDataHandler& operator=(const CacheDataHandler&) = delete;
+
+    std::unordered_map<size_t, CacheHandle> m_CacheDataMap;
+};
+
+CacheDataHandler& CacheDataHandlerInstance();
+
+} // armnn_driver
diff --git a/ConversionUtils.cpp b/ConversionUtils.cpp
index b03ffbd..c691c55 100644
--- a/ConversionUtils.cpp
+++ b/ConversionUtils.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -31,7 +31,11 @@ bool LayerInputHandle::IsValid() const
 
 void LayerInputHandle::Connect(armnn::IInputSlot& inputSlot)
 {
-    ARMNN_ASSERT(IsValid());
+    if (!IsValid())
+    {
+        throw armnn::RuntimeException("LayerInputHandle is invalid");
+    }
+
     if (m_OutputSlot)
     {
         m_OutputSlot->Connect(inputSlot);
@@ -40,7 +44,10 @@ void LayerInputHandle::Connect(armnn::IInputSlot& inputSlot)
 
 void LayerInputHandle::Disconnect(armnn::IInputSlot& inputSlot)
 {
-    ARMNN_ASSERT(IsValid());
+    if (!IsValid())
+    {
+        throw armnn::RuntimeException("LayerInputHandle is invalid");
+    }
     if (m_OutputSlot)
     {
         m_OutputSlot->Disconnect(inputSlot);
@@ -52,17 +59,37 @@ const armnn::TensorInfo& LayerInputHandle::GetTensorInfo() const
     return m_TensorInfo;
 }
 
+void LayerInputHandle::SanitizeQuantizationScale(LayerInputHandle& weight,
+                                                 LayerInputHandle& input)
+{
+    if (m_OutputSlot)
+    {
+        armnn::TensorInfo weightInfo = weight.GetTensorInfo();
+        armnn::TensorInfo inputInfo = input.GetTensorInfo();
+        armnn::TensorInfo biasInfo = GetTensorInfo();
+
+        SanitizeBiasQuantizationScale(biasInfo, weightInfo, inputInfo);
+
+        m_TensorInfo = biasInfo;
+        m_OutputSlot->SetTensorInfo(biasInfo);
+    }
+}
+
 ConstTensorPin::ConstTensorPin(bool optional)
     : m_Optional(optional)
 {}
 
-ConstTensorPin::ConstTensorPin(const armnn::TensorInfo& tensorInfo,
+ConstTensorPin::ConstTensorPin(armnn::TensorInfo& tensorInfo,
                                const void* valueStart,
                                uint32_t numBytes,
                                const armnn::PermutationVector& mappings)
+    : m_Optional(false)
 {
     armnn::IgnoreUnused(numBytes);
-    assert(tensorInfo.GetNumBytes() == numBytes);
+    if (tensorInfo.GetNumBytes() != numBytes)
+    {
+        ALOGW("The size of ConstTensor does not match its TensorInfo.");
+    }
 
     const bool needsSwizzling = (mappings.GetSize() > 0);
     if (needsSwizzling)
@@ -70,7 +97,7 @@ ConstTensorPin::ConstTensorPin(const armnn::TensorInfo& tensorInfo,
         m_SwizzledTensorData.resize(tensorInfo.GetNumBytes());
         SwizzleAndroidNn4dTensorToArmNn(tensorInfo, valueStart, m_SwizzledTensorData.data(), mappings);
 
-        m_ConstTensor = armnn::ConstTensor(armnnUtils::Permuted(tensorInfo, mappings), m_SwizzledTensorData.data());
+        m_ConstTensor = armnn::ConstTensor(tensorInfo, m_SwizzledTensorData.data());
     }
     else
     {
@@ -112,8 +139,11 @@ armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo,
                                             armnn::IConnectableLayer* prevLayer,
                                             ConversionData& data)
 {
-    ARMNN_ASSERT(prevLayer->GetNumOutputSlots() == 1);
-
+    if (prevLayer->GetNumOutputSlots() != 1)
+    {
+        Fail("%s: Incorrect Number of OutputSlots expected 1 was %i", __func__, prevLayer->GetNumOutputSlots());
+        return nullptr;
+    }
     prevLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
 
     armnn::IConnectableLayer* activationLayer = prevLayer;
@@ -161,10 +191,12 @@ armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo,
         }
 
         bool isSupported = false;
+        armnn::BackendId setBackend;
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsActivationSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    prevLayer->GetOutputSlot(0).GetTensorInfo(),
                                    tensorInfo,
                                    activationDesc);
@@ -174,6 +206,7 @@ armnn::IConnectableLayer* ProcessActivation(const armnn::TensorInfo& tensorInfo,
         }
 
         activationLayer = data.m_Network->AddActivationLayer(activationDesc);
+        activationLayer->SetBackendId(setBackend);
 
         prevLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
         activationLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo);
diff --git a/ConversionUtils.hpp b/ConversionUtils.hpp
index 1747f61..232a81a 100644
--- a/ConversionUtils.hpp
+++ b/ConversionUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -8,9 +8,7 @@
 #include "Utils.hpp"
 
 #include <armnn/ArmNN.hpp>
-#include <armnn/ILayerSupport.hpp>
 #include <armnn/BackendHelper.hpp>
-#include <armnn/utility/Assert.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
 #include <armnn/utility/NumericCast.hpp>
 
@@ -42,9 +40,14 @@ namespace armnn_driver
 ///
 
 #ifdef ARMNN_ANDROID_R
-using OperandType = android::nn::OperandType;
+using OperandType = android::nn::hal::OperandType;
 #endif
 
+#ifdef ARMNN_ANDROID_S
+#include <nnapi/Types.h>
+#endif
+
+
 struct ConversionData
 {
     ConversionData(const std::vector<armnn::BackendId>& backends)
@@ -74,6 +77,9 @@ public:
 
     const armnn::TensorInfo& GetTensorInfo() const;
 
+    void SanitizeQuantizationScale(LayerInputHandle& weight,
+                                   LayerInputHandle& input);
+
 private:
     armnn::IOutputSlot* m_OutputSlot;
     bool                m_Valid;
@@ -91,7 +97,7 @@ public:
     // @param valueStart Start address of tensor data. Belongs to one of the memory pools associated with
     // the model being converted.
     // @param numBytes Number of bytes for the tensor data.
-    ConstTensorPin(const armnn::TensorInfo& tensorInfo, const void* valueStart, uint32_t numBytes,
+    ConstTensorPin(armnn::TensorInfo& tensorInfo, const void* valueStart, uint32_t numBytes,
                    const armnn::PermutationVector& mappings);
 
     ConstTensorPin(const ConstTensorPin& other) = delete;
@@ -138,19 +144,20 @@ static bool Fail(const char* formatStr, Args&&... args)
 
 // Convenience macro to call an Is*Supported function and log caller name together with reason for lack of support.
 // Called as: FORWARD_LAYER_SUPPORT_FUNC(__func__, Is*Supported, backends, a, b, c, d, e)
-#define FORWARD_LAYER_SUPPORT_FUNC(funcName, func, backends, supported, ...) \
+#define FORWARD_LAYER_SUPPORT_FUNC(funcName, func, backends, supported, setBackend, ...) \
 try \
 { \
     for (auto&& backendId : backends) \
     { \
         auto layerSupportObject = armnn::GetILayerSupportByBackendId(backendId); \
-        if (layerSupportObject) \
+        if (layerSupportObject.IsBackendRegistered()) \
         { \
             std::string reasonIfUnsupported; \
             supported = \
-                layerSupportObject->func(__VA_ARGS__, armnn::Optional<std::string&>(reasonIfUnsupported)); \
+                layerSupportObject.func(__VA_ARGS__, armnn::Optional<std::string&>(reasonIfUnsupported)); \
             if (supported) \
             { \
+                setBackend = backendId; \
                 break; \
             } \
             else \
@@ -279,7 +286,10 @@ armnn::IConnectableLayer& AddReshapeLayer(armnn::INetwork& network,
     reshapeDescriptor.m_TargetShape = reshapeInfo.GetShape();
 
     armnn::IConnectableLayer* reshapeLayer = network.AddReshapeLayer(reshapeDescriptor);
-    ARMNN_ASSERT(reshapeLayer != nullptr);
+    if (!reshapeLayer)
+    {
+        throw armnn::RuntimeException("ReshapeLayer is null");
+    }
 
     // Attach the input layer to the reshape layer
     inputLayer.Connect(reshapeLayer->GetInputSlot(0));
@@ -293,7 +303,10 @@ bool BroadcastTensor(LayerInputHandle& input0,
                      armnn::IConnectableLayer* startLayer,
                      ConversionData& data)
 {
-    ARMNN_ASSERT(startLayer != nullptr);
+    if (!startLayer)
+    {
+        throw armnn::RuntimeException("StartLayer is null");
+    }
 
     const armnn::TensorInfo& inputInfo0 = input0.GetTensorInfo();
     const armnn::TensorInfo& inputInfo1 = input1.GetTensorInfo();
@@ -336,10 +349,12 @@ bool BroadcastTensor(LayerInputHandle& input0,
     armnn::ReshapeDescriptor reshapeDescriptor;
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsReshapeSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                smallInfo,
                                reshapedInfo,
                                reshapeDescriptor);
@@ -348,8 +363,13 @@ bool BroadcastTensor(LayerInputHandle& input0,
         return false;
     }
 
-    ARMNN_ASSERT(data.m_Network != nullptr);
+    if (!data.m_Network)
+    {
+        throw armnn::RuntimeException("Network is null");
+    }
+
     armnn::IConnectableLayer& reshapeLayer = AddReshapeLayer(*data.m_Network, smallInputHandle, reshapedInfo);
+    reshapeLayer.SetBackendId(setBackend);
 
     if (input0IsSmaller)
     {
@@ -418,7 +438,7 @@ void CalcPaddingTransposeConv(uint32_t output, uint32_t kernel, int32_t stride,
 Shape GetOperandShape(const V1_0::Operand& operand)
 {
     Shape shape;
-    shape.type = android::nn::OperandType(operand.type);
+    shape.type = OperandType(operand.type);
     shape.dimensions = operand.dimensions;
     shape.scale = operand.scale;
     shape.offset = operand.zeroPoint;
@@ -430,7 +450,7 @@ Shape GetOperandShape(const V1_0::Operand& operand)
 Shape GetOperandShape(const V1_2::Operand& operand)
 {
     Shape shape;
-    shape.type = android::nn::OperandType(operand.type);
+    shape.type = OperandType(operand.type);
     shape.dimensions = operand.dimensions;
     shape.scale = operand.scale;
     shape.offset = operand.zeroPoint;
@@ -474,7 +494,8 @@ void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
         std::transform(biasScales.begin(), biasScales.end(), biasScales.begin(), UpdateBiasScaleValue);
 
         biasInfo.SetQuantizationScales(biasScales);
-        biasInfo.SetQuantizationDim(weightInfo.GetQuantizationDim());
+        // bias is expected to be a 1d tensor, set qdim=0
+        biasInfo.SetQuantizationDim(0);
 
         ALOGV("Bias quantization params have been updated for per-axis quantization");
     }
@@ -495,7 +516,7 @@ void SanitizeBiasQuantizationScale(armnn::TensorInfo& biasInfo,
 // 4D Tensor Permutations
 const armnn::PermutationVector IdentityPermutation4D({ 0U, 1U, 2U, 3U });
 const armnn::PermutationVector IdentityPermutation3D({ 0U, 1U, 2U });
-const armnn::PermutationVector SwapDim1And2({ 0U, 2U, 1U, 3U });
+const armnn::PermutationVector SwapDim2And3({ 0U, 1U, 3U, 2U });
 
 // 3D Permutation Vectors
 const armnn::PermutationVector RotateTensorLeft({ 1U, 2U, 0U });
@@ -507,9 +528,10 @@ armnn::IConnectableLayer& AddTransposeLayer(armnn::INetwork& network, OSlot& inp
 {
     // Add swizzle layer
     armnn::IConnectableLayer* const layer = network.AddTransposeLayer(mappings);
-
-    ARMNN_ASSERT(layer != nullptr);
-
+    if (!layer)
+    {
+        throw armnn::RuntimeException("TransposeLayer is null");
+    }
     // Connect input to swizzle layer
     input.Connect(layer->GetInputSlot(0));
 
@@ -571,7 +593,8 @@ bool RequiresReshape(armnn::TensorShape & inputShape)
 void SwizzleInputs(armnn::INetwork& network,
                    std::vector<LayerInputHandle>& inputs,
                    std::vector<armnn::TensorShape>& inputShapes,
-                   const armnn::PermutationVector& mapping)
+                   const armnn::PermutationVector& mapping,
+                   std::vector<armnn::BackendId>& setBackends)
 {
     if (!mapping.IsEqual(IdentityPermutation4D))
     {
@@ -580,6 +603,7 @@ void SwizzleInputs(armnn::INetwork& network,
         {
             // add swizzle layer
             armnn::IConnectableLayer& swizzleLayer = AddTransposeLayer(network, inputs[i], mapping);
+            swizzleLayer.SetBackendId(setBackends[i]);
             auto& outputSlot = swizzleLayer.GetOutputSlot(0);
             auto& outputInfo = outputSlot.GetTensorInfo();
             // replace inputs with the swizzled ones
@@ -597,6 +621,7 @@ bool TransposeInputTensors(ConversionData& data,
     // If we have a IdentityPermutation4D or IdentityPermutation3D then we are not permuting
     if (!mapping.IsEqual(IdentityPermutation4D) && !mapping.IsEqual(IdentityPermutation3D))
     {
+        std::vector<armnn::BackendId> setBackendsVec;
         armnn::TensorInfo outputTransposeInfo;
         size_t nInputs = inputs.size();
         for (size_t i=0; i<nInputs; ++i)
@@ -607,20 +632,23 @@ bool TransposeInputTensors(ConversionData& data,
             outputTransposeInfo = armnnUtils::TransposeTensorShape(inputs[i].GetTensorInfo(), mapping);
 
             bool isSupported = false;
+            armnn::BackendId setBackend;
             FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                        IsTransposeSupported,
                                        data.m_Backends,
                                        isSupported,
+                                       setBackend,
                                        inputs[i].GetTensorInfo(),
                                        outputTransposeInfo,
                                        transposeDesc);
+            setBackendsVec.push_back(setBackend);
             if (!isSupported)
             {
                 return false;
             }
 
         }
-        SwizzleInputs(*data.m_Network, inputs, inputShapes, mapping);
+        SwizzleInputs(*data.m_Network, inputs, inputShapes, mapping, setBackendsVec);
     }
     return true;
 }
@@ -631,15 +659,19 @@ bool CreateConcatPermutationParameters(const unsigned int numberOfDimensions,
                                        std::pair<armnn::PermutationVector, armnn::PermutationVector> & permutationPair)
 {
     bool needPermute = false;
-    ARMNN_ASSERT(numberOfDimensions >= 3);
+
+    if (numberOfDimensions < 3)
+    {
+        return Fail("%s: Invalid numberOfDimensions: %i < 3", __func__, numberOfDimensions);
+    }
 
     // ArmNN uses Compute Library subtensors to perform concatenation
     // This only works when concatenating along dimension 0, 1 or 3 for a 4-D tensor,
     // or along dimension 0 or 2 for a 3-D tensor.
     if (numberOfDimensions == 4 && concatDimension == 2)
     {
-        concatDimension = 1;
-        permutationPair = std::make_pair(SwapDim1And2, SwapDim1And2);
+        concatDimension = 3;
+        permutationPair = std::make_pair(SwapDim2And3, SwapDim2And3);
         needPermute = true;
     }
     else if (numberOfDimensions == 3 && concatDimension == 1)
@@ -697,13 +729,18 @@ const HalOperand* GetInputOperand(const HalOperation& operation,
     {
         if (failOnIndexOutOfBounds)
         {
-            Fail("%s: invalid input index: %i out of %i", __func__, inputIndex, operation.inputs.size());
+            Fail("%s: Invalid input index: %i out of %i", __func__, inputIndex, operation.inputs.size());
         }
         return nullptr;
     }
 
     // Model should have been validated beforehand
-    ARMNN_ASSERT(operation.inputs[inputIndex] < getMainModel(model).operands.size());
+    if (operation.inputs[inputIndex] >= getMainModel(model).operands.size())
+    {
+        Fail("%s: invalid model index: %i >= %i", __func__, inputIndex, getMainModel(model).operands.size());
+        return nullptr;
+    }
+
     return &getMainModel(model).operands[operation.inputs[inputIndex]];
 }
 
@@ -722,8 +759,11 @@ const HalOperand* GetOutputOperand(const HalOperation& operation,
     }
 
     // Model should have been validated beforehand
-    ARMNN_ASSERT(operation.outputs[outputIndex] < getMainModel(model).operands.size());
-
+    if (operation.inputs[outputIndex] >= getMainModel(model).operands.size())
+    {
+        Fail("%s: invalid model index: %i >= %i", __func__, outputIndex, getMainModel(model).operands.size());
+        return nullptr;
+    }
     return &getMainModel(model).operands[operation.outputs[outputIndex]];
 }
 
@@ -844,11 +884,9 @@ ConstTensorPin ConvertOperandToConstTensorPin(const HalOperand& operand,
     }
 
     armnn::TensorInfo tensorInfo = GetTensorInfoForOperand(operand);
-    // Android datalayout might be different than armnn datalayout, e.g. the kernel for the depthwise convolution.
-    if (tensorInfo.HasPerAxisQuantization())
-    {
-        tensorInfo.SetQuantizationDim(dimensionMappings[tensorInfo.GetQuantizationDim().value()]);
-    }
+
+    // Make sure isConstant flag is set.
+    tensorInfo.SetConstant();
 
     if (overrideTensorShape != nullptr)
     {
@@ -1167,7 +1205,8 @@ template<typename HalPolicy,
 LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
                                            uint32_t inputIndex,
                                            const HalModel& model,
-                                           ConversionData& data)
+                                           ConversionData& data,
+                                           const armnn::PermutationVector& dimensionMappings = g_DontPermute)
 {
     using HalOperand         = typename HalPolicy::Operand;
     using HalOperandType     = typename HalPolicy::OperandType;
@@ -1206,6 +1245,7 @@ LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
                                            IsInputSupported,
                                            data.m_Backends,
                                            isInputSupported,
+                                           armnn::BackendId(),
                                            operandTensorInfo);
 
                 if (!isInputSupported)
@@ -1230,14 +1270,18 @@ LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
             case HalOperandLifeTime::CONSTANT_REFERENCE:
             {
                 // The tensor has an already known constant value, and can be converted into an ArmNN Constant layer.
-                ConstTensorPin tensorPin = ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data);
+                ConstTensorPin tensorPin =
+                                    ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data, dimensionMappings);
+
                 if (tensorPin.IsValid())
                 {
                     bool isSupported = false;
+                    armnn::BackendId setBackend;
                     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                                IsConstantSupported,
                                                data.m_Backends,
                                                isSupported,
+                                               setBackend,
                                                tensorPin.GetConstTensor().GetInfo());
                     if (!isSupported)
                     {
@@ -1246,17 +1290,18 @@ LayerInputHandle ConvertToLayerInputHandle(const HalOperation& operation,
 
                     armnn::IConnectableLayer* constantLayer =
                                     data.m_Network->AddConstantLayer(tensorPin.GetConstTensor());
+                    constantLayer->SetBackendId(setBackend);
                     armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
-                    outputSlot.SetTensorInfo(tensorPin.GetConstTensor().GetInfo());
+                    armnn::TensorInfo constantTensorInfo = tensorPin.GetConstTensor().GetInfo();
+                    outputSlot.SetTensorInfo(constantTensorInfo);
 
-                    return LayerInputHandle(true, &outputSlot, operandTensorInfo);
+                    return LayerInputHandle(true, &outputSlot, constantTensorInfo);
                 }
                 else
                 {
                     Fail("%s: invalid operand tensor", __func__);
                     return LayerInputHandle();
                 }
-                break;
             }
             default:
             {
@@ -1280,7 +1325,8 @@ template<typename HalPolicy>
 LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetworks::V1_3::Operation& operation,
                                            uint32_t inputIndex,
                                            const::android::hardware::neuralnetworks::V1_3::Model& model,
-                                           ConversionData& data)
+                                           ConversionData& data,
+                                           const armnn::PermutationVector& dimensionMappings = g_DontPermute)
 {
     using HalOperand         = typename HalPolicy::Operand;
     using HalOperandType     = typename HalPolicy::OperandType;
@@ -1333,6 +1379,7 @@ LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetw
                                            IsInputSupported,
                                            data.m_Backends,
                                            isInputSupported,
+                                           armnn::BackendId(),
                                            operandTensorInfo);
 
                 if (!isInputSupported)
@@ -1357,14 +1404,18 @@ LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetw
             case HalOperandLifeTime::CONSTANT_REFERENCE:
             {
                 // The tensor has an already known constant value, and can be converted into an ArmNN Constant layer.
-                ConstTensorPin tensorPin = ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data);
+                ConstTensorPin tensorPin =
+                                    ConvertOperandToConstTensorPin<HalPolicy>(*operand, model, data, dimensionMappings);
+
                 if (tensorPin.IsValid())
                 {
                     bool isSupported = false;
+                    armnn::BackendId setBackend;
                     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                                IsConstantSupported,
                                                data.m_Backends,
                                                isSupported,
+                                               setBackend,
                                                tensorPin.GetConstTensor().GetInfo());
                     if (!isSupported)
                     {
@@ -1373,10 +1424,12 @@ LayerInputHandle ConvertToLayerInputHandle(const ::android::hardware::neuralnetw
 
                     armnn::IConnectableLayer* constantLayer =
                         data.m_Network->AddConstantLayer(tensorPin.GetConstTensor());
+                    constantLayer->SetBackendId(setBackend);
                     armnn::IOutputSlot& outputSlot = constantLayer->GetOutputSlot(0);
-                    outputSlot.SetTensorInfo(tensorPin.GetConstTensor().GetInfo());
+                    armnn::TensorInfo constantTensorInfo = tensorPin.GetConstTensor().GetInfo();
+                    outputSlot.SetTensorInfo(constantTensorInfo);
 
-                    return LayerInputHandle(true, &outputSlot, operandTensorInfo);
+                    return LayerInputHandle(true, &outputSlot, constantTensorInfo);
                 }
                 else
                 {
@@ -1440,7 +1493,7 @@ bool SetupAndTrackLayerOutputSlot(const HalOperation& operation,
         // Type one dynamic tensors require the previous layer's output shape for inference
         for (unsigned int inputSlotIndex = 0; inputSlotIndex < layer.GetNumInputSlots(); ++inputSlotIndex)
         {
-            if(!layer.GetInputSlot(inputSlotIndex).GetConnection())
+            if (!layer.GetInputSlot(inputSlotIndex).GetConnection())
             {
                 return false;
             }
@@ -1570,13 +1623,14 @@ bool ConvertToActivation(const HalOperation& operation,
     const armnn::TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand);
 
     bool isSupported = false;
-
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsActivationSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outInfo,
                                    activationDesc);
@@ -1597,7 +1651,11 @@ bool ConvertToActivation(const HalOperation& operation,
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddActivationLayer(activationDesc);
-    ARMNN_ASSERT(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ActivationLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1783,13 +1841,14 @@ bool ConvertPooling2d(const HalOperation& operation,
     }
 
     bool isSupported = false;
-
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsPooling2dSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc);
@@ -1811,6 +1870,7 @@ bool ConvertPooling2d(const HalOperation& operation,
     }
 
     armnn::IConnectableLayer* pooling2dLayer = data.m_Network->AddPooling2dLayer(desc);
+    pooling2dLayer->SetBackendId(setBackend);
     if (!pooling2dLayer)
     {
         return Fail("%s: AddPooling2dLayer failed", __func__);
@@ -1830,79 +1890,6 @@ bool ConvertPooling2d(const HalOperation& operation,
 template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
-bool ConvertAdd(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
-    using HalOperand = typename HalPolicy::Operand;
-
-    LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
-    LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
-    if (!input0.IsValid() || !input1.IsValid())
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    // The FuseActivation parameter is always the input index 2
-    // and it should be optional
-    ActivationFn activationFunction;
-    if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    const HalOperand* outputOperand = GetOutputOperand<HalPolicy>(operation, 0, model);
-    if (!outputOperand)
-    {
-        return false;
-    }
-
-    const armnn::TensorInfo& inputInfo0 = input0.GetTensorInfo();
-    const armnn::TensorInfo& inputInfo1 = input1.GetTensorInfo();
-
-    const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
-
-    bool isSupported = false;
-    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
-    {
-        FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                                   IsAdditionSupported,
-                                   data.m_Backends,
-                                   isSupported,
-                                   inputInfo0,
-                                   inputInfo1,
-                                   outputInfo);
-    };
-
-    if(!IsDynamicTensor(outputInfo))
-    {
-        validateFunc(outputInfo, isSupported);
-    }
-    else
-    {
-        isSupported = AreDynamicTensorsSupported();
-    }
-
-    if (!isSupported)
-    {
-        return false;
-    }
-
-    armnn::IConnectableLayer* const startLayer = data.m_Network->AddAdditionLayer();
-
-    bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
-    if (!isReshapeSupported)
-    {
-        return false;
-    }
-
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
-                                                   data, nullptr, validateFunc, activationFunction);
-
-}
-
-template<typename HalPolicy,
-         typename HalOperation = typename HalPolicy::Operation,
-         typename HalModel     = typename HalPolicy::Model>
 bool ConvertArgMinMax(const HalOperation& operation,
                       const HalModel& model,
                       ConversionData& data,
@@ -1952,13 +1939,14 @@ bool ConvertArgMinMax(const HalOperation& operation,
     descriptor.m_Axis     = axis;
 
     bool isSupported = false;
-
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsArgMinMaxSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo0,
                                    outputInfo,
                                    descriptor);
@@ -1979,8 +1967,11 @@ bool ConvertArgMinMax(const HalOperation& operation,
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddArgMinMaxLayer(descriptor);
-    assert(layer != nullptr);
-
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ArgMinMaxLayer", __func__);
+    }
     input0.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2083,10 +2074,12 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
             reshapeDescriptor.m_TargetShape = reshapeInfo.GetShape();
 
             bool isSupported = false;
+            armnn::BackendId setBackendReshape;
             FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                        IsReshapeSupported,
                                        data.m_Backends,
                                        isSupported,
+                                       setBackendReshape,
                                        operandInputHandle.GetTensorInfo(),
                                        reshapeInfo,
                                        reshapeDescriptor);
@@ -2096,6 +2089,7 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
                 return false;
             }
             armnn::IConnectableLayer& newReshape = AddReshapeLayer(*data.m_Network, operandInputHandle, reshapeInfo);
+            newReshape.SetBackendId(setBackendReshape);
 
             // Point to the reshape operation rather then the input operation
             operandShape       = reshapeInfo.GetShape();
@@ -2111,7 +2105,11 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
         }
     }
 
-    ARMNN_ASSERT(inputShapes.size() == inputHandles.size());
+    if (inputShapes.size() != inputHandles.size())
+    {
+        return Fail("%s: invalid model input shapes size doesn't match input handles size: %i != %i", __func__,
+                    inputShapes.size(), inputHandles.size());
+    }
 
     if (inputsHaveBeenReshaped)
     {
@@ -2198,9 +2196,16 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
                    [](const LayerInputHandle& h)->const armnn::TensorInfo*{ return &h.GetTensorInfo(); });
 
     bool isSupported  = false;
+    armnn::BackendId setBackendConcat;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported){
-        FORWARD_LAYER_SUPPORT_FUNC(__func__, IsConcatSupported, data.m_Backends, isSupported, inputTensorInfos,
-                                   outputInfo, concatDescriptor);
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   IsConcatSupported,
+                                   data.m_Backends,
+                                   isSupported,
+                                   setBackendConcat,
+                                   inputTensorInfos,
+                                   outputInfo,
+                                   concatDescriptor);
     };
 
     if (!isDynamicTensor)
@@ -2218,15 +2223,24 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddConcatLayer(concatDescriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackendConcat);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ConcatLayer", __func__);
+    }
     layer->GetOutputSlot(0).SetTensorInfo(outputInfo);
     // Connect inputs to the layer
     const int numInputSlots = layer->GetNumInputSlots();
-    assert(static_cast<std::size_t>(numInputSlots) == inputHandles.size());
+
+    if (static_cast<std::size_t>(numInputSlots) != inputHandles.size())
+    {
+        return Fail("%s: invalid model input slots size doesn't match input handles size: %i != %i", __func__,
+                    static_cast<std::size_t>(numInputSlots), inputHandles.size());
+    }
     for (int i = 0; i < numInputSlots; ++i)
     {
         // connect the input directly to the merge (concat) layer
-        inputHandles[static_cast<unsigned int>(i)].Connect(layer->GetInputSlot(i));
+        inputHandles[static_cast<unsigned int>(i)].Connect(layer->GetInputSlot(static_cast<unsigned int>(i)));
     }
 
     // Transpose the output shape
@@ -2237,10 +2251,12 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
         armnn::TensorInfo outputTransposeInfo = armnnUtils::TransposeTensorShape(inputTransposeInfo,
                                                                                  permutationPair.second);
         isSupported = false;
+        armnn::BackendId setBackendTranspose;
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsTransposeSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackendTranspose,
                                    inputTransposeInfo,
                                    outputTransposeInfo,
                                    transposeDesc);
@@ -2251,6 +2267,7 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
         // Add permutation layer and connect the output to it, the permutation becomes the output layer
         armnn::IConnectableLayer& deswizzleLayer = AddTransposeLayer(*data.m_Network, layer->GetOutputSlot(0),
                                                                      permutationPair.second);
+        deswizzleLayer.SetBackendId(setBackendTranspose);
         layer = &deswizzleLayer;
 
         return true;
@@ -2266,7 +2283,10 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
         if (isDynamicTensor)
         {
             // Infer the output shapes of concat if outputs are type 1 dynamic
-            ARMNN_ASSERT(layer->GetOutputSlot(0).IsTensorInfoSet());
+            if (!layer->GetOutputSlot(0).IsTensorInfoSet())
+            {
+                return Fail("%s: TensorInfo is not set", __func__);
+            }
             if (!ValidateConcatOutputShape(inputShapes,
                                            layer->GetOutputSlot(0).GetTensorInfo().GetShape(),
                                            concatDim))
@@ -2293,11 +2313,13 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
         armnn::TensorInfo concatInfo = layer->GetOutputSlot(0).GetTensorInfo();
 
         isSupported = false;
+        armnn::BackendId setBackendReshape2;
         auto validateReshapeFunc = [&](const armnn::TensorInfo& afterConcatInfo, bool& isSupported){
             FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                        IsReshapeSupported,
                                        data.m_Backends,
                                        isSupported,
+                                       setBackendReshape2,
                                        concatInfo,
                                        afterConcatInfo,
                                        reshapeDescriptor);
@@ -2317,6 +2339,7 @@ bool ConvertConcatenation(const HalOperation& operation, const HalModel& model,
             return false;
         }
         layer = &AddReshapeLayer(*data.m_Network, layer->GetOutputSlot(0), afterConcatInfo);
+        layer->SetBackendId(setBackendReshape2);
         return SetupAndTrackLayerOutputSlot<HalPolicy>(operation,
                                                        0,
                                                        *layer,
@@ -2352,18 +2375,21 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
     const armnn::TensorInfo& inputInfo  = input.GetTensorInfo();
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
-    // ArmNN does not currently support non-fixed weights or bias
-    const ConstTensorPin weightsPin = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 1, model, data);
-    const ConstTensorPin biasPin    = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
+    LayerInputHandle weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+    if (!weightsInput.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
 
-    if (!weightsPin.IsValid() || !biasPin.IsValid())
+    LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+    if (!biasInput.IsValid())
     {
         return Fail("%s: Operation has invalid inputs", __func__);
     }
 
-    armnn::ConstTensor weights = weightsPin.GetConstTensor();
-    armnn::ConstTensor bias    = biasPin.GetConstTensor();
-    SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+    biasInput.SanitizeQuantizationScale(weightsInput, input);
+    armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+    armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
 
     armnn::Convolution2dDescriptor desc;
     desc.m_DataLayout = armnn::DataLayout::NHWC;
@@ -2393,8 +2419,8 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
             return Fail("%s: Operation has invalid inputs", __func__);
         }
 
-        const uint32_t kernelX = weights.GetShape()[2];
-        const uint32_t kernelY = weights.GetShape()[1];
+        const uint32_t kernelX = weightsInfo.GetShape()[2];
+        const uint32_t kernelY = weightsInfo.GetShape()[1];
         const uint32_t inputX  = inputInfo.GetShape()[2];
         const uint32_t inputY  = inputInfo.GetShape()[1];
 
@@ -2407,19 +2433,21 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
     }
 
     desc.m_BiasEnabled = true;
-    armnn::Optional<armnn::TensorInfo> biases(bias.GetInfo());
+    armnn::Optional<armnn::TensorInfo> biases(biasInfo);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsConvolution2dSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc,
-                                   weights.GetInfo(),
+                                   weightsInfo,
                                    biases);
     };
 
@@ -2437,8 +2465,8 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
         return false;
     }
 
-    armnn::IConnectableLayer* startLayer =
-            data.m_Network->AddConvolution2dLayer(desc, weights, armnn::Optional<armnn::ConstTensor>(bias));
+    armnn::IConnectableLayer* startLayer = data.m_Network->AddConvolution2dLayer(desc);
+    startLayer->SetBackendId(setBackend);
 
     if (!startLayer)
     {
@@ -2447,6 +2475,10 @@ bool ConvertConv2d(const HalOperation& operation, const HalModel& model, Convers
 
     input.Connect(startLayer->GetInputSlot(0));
 
+    // Connect weights and bias inputs
+    weightsInput.Connect(startLayer->GetInputSlot(1));
+    biasInput.Connect(startLayer->GetInputSlot(2));
+
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
                                                    data, nullptr, validateFunc, activation);
 }
@@ -2495,12 +2527,14 @@ bool ConvertDepthToSpace(const HalOperation& operation, const HalModel& model, C
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsDepthToSpaceSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -2521,7 +2555,11 @@ bool ConvertDepthToSpace(const HalOperation& operation, const HalModel& model, C
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddDepthToSpaceLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the DepthToSpaceLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2555,42 +2593,42 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
     // ArmNN does not currently support non-fixed weights or bias
     // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ]
     const HalOperand* weightsOperand = GetInputOperand<HalPolicy>(operation, 1, model);
-
-    if (weightsOperand == nullptr)
+    if (!weightsOperand)
+    {
+        return Fail("%s: Could not read weights", __func__);
+    }
+    // Basic sanity check on the weights shape.
+    // ANEURALNETWORKS_DEPTHWISE_CONV_2D specifies a 4-D tensor, of shape
+    // [1, filter_height, filter_width, depth_out]
+    if (weightsOperand->dimensions[0] != 1)
     {
-        return Fail("%s: Operand is invalid", __func__);
+        return Fail("%s: Filter operand dimension 0 is invalid, should be 1", __func__);
     }
+
     armnn::DepthwiseConvolution2dDescriptor desc;
     desc.m_DataLayout = armnn::DataLayout::NHWC;
 
-    // Reinterpret weight data as [ H, W, I, M ]
-    armnn::TensorShape weightsShape({ weightsOperand->dimensions[1],
-                                      weightsOperand->dimensions[2],
-                                      inputInfo.GetShape()[3],
-                                      weightsOperand->dimensions[3] / inputInfo.GetShape()[3] });
-
-    // Swizzle weight data [ H, W, I, M ] -> [ M, I, H, W ]
-    const armnn::PermutationVector HWIMToMIHW = { 2U, 3U, 1U, 0U };
-
-    const ConstTensorPin weightsPin =
-        ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
-                                                         1,
-                                                         model,
-                                                         data,
-                                                         HWIMToMIHW,
-                                                         &weightsShape);
+    LayerInputHandle weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+    if (!weightsInput.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
 
-    // Bias is a 1D tensor
-    const ConstTensorPin biasPin = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
+    const HalOperand* biasOperand = GetInputOperand<HalPolicy>(operation, 2, model);
+    if (!biasOperand)
+    {
+        return Fail("%s: Could not read bias", __func__);
+    }
 
-    if (!weightsPin.IsValid() || !biasPin.IsValid())
+    LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+    if (!biasInput.IsValid())
     {
         return Fail("%s: Operation has invalid inputs", __func__);
     }
 
-    armnn::ConstTensor weights = weightsPin.GetConstTensor();
-    armnn::ConstTensor bias = biasPin.GetConstTensor();
-    SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+    biasInput.SanitizeQuantizationScale(weightsInput, input);
+    armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+    armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
 
     ActivationFn activation;
 
@@ -2618,8 +2656,8 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
             return Fail("%s: Operation has invalid inputs", __func__);
         }
 
-        const uint32_t kernelX = weights.GetShape()[3];
-        const uint32_t kernelY = weights.GetShape()[2];
+        const uint32_t kernelX = weightsInfo.GetShape()[2];
+        const uint32_t kernelY = weightsInfo.GetShape()[1];
         const uint32_t inputX  = inputInfo.GetShape()[2];
         const uint32_t inputY  = inputInfo.GetShape()[1];
 
@@ -2632,19 +2670,21 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
     }
 
     desc.m_BiasEnabled = true;
-    armnn::Optional<armnn::TensorInfo> biases(bias.GetInfo());
+    armnn::Optional<armnn::TensorInfo> biases(biasInfo);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsDepthwiseConvolutionSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc,
-                                   weights.GetInfo(),
+                                   weightsInfo,
                                    biases);
     };
 
@@ -2663,8 +2703,8 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
         return false;
     }
 
-    armnn::IConnectableLayer* startLayer =
-            data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, armnn::Optional<armnn::ConstTensor>(bias));
+    armnn::IConnectableLayer* startLayer = data.m_Network->AddDepthwiseConvolution2dLayer(desc);
+    startLayer->SetBackendId(setBackend);
     if (!startLayer)
     {
         return Fail("%s: AddDepthwiseConvolution2dLayer failed", __func__);
@@ -2672,6 +2712,10 @@ bool ConvertDepthwiseConv2d(const HalOperation& operation, const HalModel& model
 
     input.Connect(startLayer->GetInputSlot(0));
 
+    // Connect weights and bias inputs
+    weightsInput.Connect(startLayer->GetInputSlot(1));
+    biasInput.Connect(startLayer->GetInputSlot(2));
+
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
                                                    data, nullptr, validateFunc, activation);
 }
@@ -2705,12 +2749,14 @@ bool ConvertDequantize(const HalOperation& operation, const HalModel& model, Con
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsDequantizeSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo);
     };
@@ -2730,7 +2776,11 @@ bool ConvertDequantize(const HalOperation& operation, const HalModel& model, Con
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddDequantizeLayer();
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the DequantizeLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2739,10 +2789,16 @@ bool ConvertDequantize(const HalOperation& operation, const HalModel& model, Con
 template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
-bool ConvertDiv(const HalOperation& operation, const HalModel& model, ConversionData& data)
+bool ConvertElementwiseBinary(const HalOperation& operation,
+                              const HalModel& model,
+                              ConversionData& data,
+                              armnn::BinaryOperation binaryOperation)
 {
     using HalOperand = typename HalPolicy::Operand;
 
+    ALOGV("HalPolicy::ConvertElementwiseBinary()");
+    ALOGV("binaryOperation = %s", GetBinaryOperationAsCString(binaryOperation));
+
     LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
     LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
 
@@ -2751,35 +2807,38 @@ bool ConvertDiv(const HalOperation& operation, const HalModel& model, Conversion
         return Fail("%s: Operation has invalid inputs", __func__);
     }
 
-    // The FuseActivation parameter is always the input index 2
-    // and it should be optional
+    // The FuseActivation parameter is always the input index 2, and it should be optional
     ActivationFn activationFunction;
     if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
     {
-        return Fail("%s: Operation has invalid inputs", __func__);
+        return Fail("%s: Operation has invalid optional input: activation function", __func__);
     }
 
     const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
     if (!output)
     {
-        return Fail("%s: Could not read output 0", __func__);
+        return Fail("%s: Could not read output", __func__);
     }
 
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
+    armnn::ElementwiseBinaryDescriptor descriptor(binaryOperation);
+
     bool isSupported = false;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                                   IsDivisionSupported,
+                                   IsElementwiseBinarySupported,
                                    data.m_Backends,
                                    isSupported,
+                                   armnn::BackendId(),
                                    input0.GetTensorInfo(),
                                    input1.GetTensorInfo(),
-                                   outputInfo);
+                                   outputInfo,
+                                   binaryOperation);
     };
 
-    if(!IsDynamicTensor(outputInfo))
+    if (!IsDynamicTensor(outputInfo))
     {
         validateFunc(outputInfo, isSupported);
     }
@@ -2793,19 +2852,22 @@ bool ConvertDiv(const HalOperation& operation, const HalModel& model, Conversion
         return false;
     }
 
-    armnn::IConnectableLayer* const startLayer = data.m_Network->AddDivisionLayer();
-
-    bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
+    armnn::IConnectableLayer* layer = data.m_Network->AddElementwiseBinaryLayer(descriptor);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ElementwiseBinaryLayer", __func__);
+    }
+    bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
     if (!isReshapeSupported)
     {
         return false;
     }
 
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
-                                                   data, nullptr, validateFunc, activationFunction);
-
+    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc,
+                                                   activationFunction);
 }
 
+
 template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
@@ -2828,12 +2890,14 @@ bool ConvertFloor(const HalOperation& operation, const HalModel& model, Conversi
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsFloorSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outputInfo);
     };
@@ -2853,7 +2917,11 @@ bool ConvertFloor(const HalOperation& operation, const HalModel& model, Conversi
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddFloorLayer();
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the FloorLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2938,7 +3006,11 @@ DequantizeResult DequantizeIfRequired(size_t operand_index,
         }
 
         const HalOperand* operand = GetInputOperand<HalPolicy>(operationIt, 0, model);
-        ARMNN_ASSERT(operand);
+
+        if (!operand)
+        {
+            return { nullptr, 0, armnn::TensorInfo(), DequantizeStatus::INVALID_OPERAND };
+        }
 
         if (!IsQSymm8(*operand))
         {
@@ -2962,8 +3034,12 @@ DequantizeResult DequantizeIfRequired(size_t operand_index,
         for (size_t i = 0; i < dequantizedBufferLength; ++i)
         {
             float* dstPtr = dequantizedBuffer.get();
-            ARMNN_ASSERT(dstPtr);
-            *dstPtr++ = quantizedBuffer[i] * quantizationScale;
+
+            if (!dstPtr)
+            {
+                return { nullptr, 0, armnn::TensorInfo(), DequantizeStatus::INVALID_OPERAND };
+            }
+            *dstPtr = quantizedBuffer[i] * quantizationScale;
         }
 
         // Construct tensor info for dequantized ConstTensor
@@ -3035,34 +3111,50 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
     const armnn::TensorInfo& inputInfo  = input.GetTensorInfo();
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
-    ConstTensorPin weightsPin = DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 1);
-    ConstTensorPin biasPin    = ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data); // 1D
+    LayerInputHandle weightsInput = LayerInputHandle();
+    const HalOperand* weightsOperand = GetInputOperand<HalPolicy>(operation, 1, model);
+    if (!weightsOperand)
+    {
+        return Fail("%s: Could not read weights", __func__);
+    }
 
-    if (!weightsPin.IsValid())
+    // If weights are constant a separate constant layer will be created to store data.
+    // Otherwise handle non const weights as inputs.
+    weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+    if (!weightsInput.IsValid())
     {
-        return Fail("%s: Operation has invalid weights", __func__);
+        return Fail("%s: Operation has invalid inputs", __func__);
     }
 
-    if (!biasPin.IsValid())
+    LayerInputHandle biasInput = LayerInputHandle();
+    const HalOperand* biasOperand = GetInputOperand<HalPolicy>(operation, 2, model);
+    if (!biasOperand)
     {
-        return Fail("%s: Operation has invalid bias", __func__);
+        return Fail("%s: Could not read bias", __func__);
     }
 
-    armnn::ConstTensor weights = weightsPin.GetConstTensor();
-    armnn::ConstTensor bias    = biasPin.GetConstTensor();
-    armnn::TensorInfo reshapedInfo = inputInfo;
+    // If bias are constant a separate constant layer will be created to store data.
+    // Otherwise handle non const bias as inputs.
+    biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+    if (!biasInput.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
 
+    armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+    armnn::TensorInfo reshapedInfo = inputInfo;
     try
     {
-        reshapedInfo.SetShape(FlattenFullyConnectedInput(inputInfo.GetShape(), weights.GetInfo().GetShape()));
+        reshapedInfo.SetShape(FlattenFullyConnectedInput(inputInfo.GetShape(), weightsInfo.GetShape()));
     }
     catch (const std::exception& e)
     {
         return Fail("%s: %s", __func__, e.what());
     }
 
-    // ensuring that the bias value is within 1% of the weights input (small float differences can exist)
-    SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), reshapedInfo);
+    // Ensuring that the bias value is within 1% of the weights input (small float differences can exist)
+    armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
+    SanitizeBiasQuantizationScale(biasInfo, weightsInfo, reshapedInfo);
 
     ActivationFn activationFunction;
     if (!GetInputActivationFunction<HalPolicy>(operation, 3, activationFunction, model, data))
@@ -3073,12 +3165,14 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
     armnn::FullyConnectedDescriptor desc;
     desc.m_TransposeWeightMatrix = true;
     desc.m_BiasEnabled           = true;
+    desc.m_ConstantWeights       = IsOperandConstant<HalPolicy>(*weightsOperand);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         if (!VerifyFullyConnectedShapes(reshapedInfo.GetShape(),
-                                        weights.GetInfo().GetShape(),
+                                        weightsInfo.GetShape(),
                                         outputInfo.GetShape(),
                                         desc.m_TransposeWeightMatrix))
         {
@@ -3088,14 +3182,15 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
         }
 
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                               IsFullyConnectedSupported,
-                               data.m_Backends,
-                               isSupported,
-                               reshapedInfo,
-                               outputInfo,
-                               weights.GetInfo(),
-                               bias.GetInfo(),
-                               desc);
+                                   IsFullyConnectedSupported,
+                                   data.m_Backends,
+                                   isSupported,
+                                   setBackend,
+                                   reshapedInfo,
+                                   outputInfo,
+                                   weightsInfo,
+                                   biasInfo,
+                                   desc);
     };
 
     if(!IsDynamicTensor(outputInfo))
@@ -3112,8 +3207,9 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
         return false;
     }
 
-    armnn::IConnectableLayer* startLayer =
-            data.m_Network->AddFullyConnectedLayer(desc, weights, armnn::Optional<armnn::ConstTensor>(bias));
+    // Add FullyConnected layer. Weights and bias will be connected as constant layers or non const inputs.
+    armnn::IConnectableLayer* startLayer = data.m_Network->AddFullyConnectedLayer(desc);
+    startLayer->SetBackendId(setBackend);
 
     if (inputInfo.GetNumDimensions() > 2U)
     {
@@ -3121,7 +3217,10 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
         reshapeDescriptor.m_TargetShape = reshapedInfo.GetShape();
 
         armnn::IConnectableLayer* reshapeLayer = data.m_Network->AddReshapeLayer(reshapeDescriptor);
-        assert(reshapeLayer != nullptr);
+        if (!reshapeLayer)
+        {
+            return Fail("%s:  could not add the reshapeLayer", __func__);
+        }
         input.Connect(reshapeLayer->GetInputSlot(0));
         reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
         reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
@@ -3131,6 +3230,10 @@ bool ConvertFullyConnected(const HalOperation& operation, const HalModel& model,
         input.Connect(startLayer->GetInputSlot(0));
     }
 
+    // Connect weights and bias inputs
+    weightsInput.Connect(startLayer->GetInputSlot(1));
+    biasInput.Connect(startLayer->GetInputSlot(2));
+
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
                                                    data, nullptr, validateFunc, activationFunction);
 }
@@ -3171,12 +3274,14 @@ bool ConvertL2Normalization(const HalOperation& operation, const HalModel& model
     desc.m_DataLayout = armnn::DataLayout::NHWC;
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsL2NormalizationSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc);
@@ -3197,7 +3302,11 @@ bool ConvertL2Normalization(const HalOperation& operation, const HalModel& model
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddL2NormalizationLayer(desc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the L2NormalizationLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -3257,12 +3366,14 @@ bool ConvertLocalResponseNormalization(const HalOperation& operation,
     descriptor.m_NormSize = 1 + (2 * descriptor.m_NormSize);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsNormalizationSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -3282,9 +3393,12 @@ bool ConvertLocalResponseNormalization(const HalOperation& operation,
         return false;
     }
 
-
     armnn::IConnectableLayer* layer = data.m_Network->AddNormalizationLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the NormalizationLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -3355,12 +3469,14 @@ bool ConvertMean(const HalOperation& operation, const HalModel& model, Conversio
     descriptor.m_KeepDims = keepDims > 0;
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsMeanSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -3381,83 +3497,14 @@ bool ConvertMean(const HalOperation& operation, const HalModel& model, Conversio
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddMeanLayer(descriptor);
-    assert(layer != nullptr);
-    input.Connect(layer->GetInputSlot(0));
-
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
-         typename HalOperation = typename HalPolicy::Operation,
-         typename HalModel     = typename HalPolicy::Model>
-bool ConvertMul(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
-    using HalOperand = typename HalPolicy::Operand;
-
-    LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
-    LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
-    if (!input0.IsValid() || !input1.IsValid())
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    // The FuseActivation parameter is always the input index 2
-    // and it should be optional
-    ActivationFn activationFunction;
-    if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    const HalOperand* outputOperand = GetOutputOperand<HalPolicy>(operation, 0, model);
-
-    if (outputOperand == nullptr)
-    {
-        return false;
-    }
-
-    const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
-
-    bool isSupported = false;
-    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
-    {
-        FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                                   IsMultiplicationSupported,
-                                   data.m_Backends,
-                                   isSupported,
-                                   input0.GetTensorInfo(),
-                                   input1.GetTensorInfo(),
-                                   outputInfo);
-    };
-
-    if(!IsDynamicTensor(outputInfo))
-    {
-        validateFunc(outputInfo, isSupported);
-    }
-    else
-    {
-        isSupported = AreDynamicTensorsSupported();
-    }
-
-    if (!isSupported)
-    {
-        return false;
-    }
-
-    armnn::IConnectableLayer* const startLayer = data.m_Network->AddMultiplicationLayer();
-
-    const armnn::TensorInfo& inputTensorInfo0 = input0.GetTensorInfo();
-    const armnn::TensorInfo& inputTensorInfo1 = input1.GetTensorInfo();
-
-    bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
-    if (!isReshapeSupported)
+    layer->SetBackendId(setBackend);
+    if (!layer)
     {
-        return false;
+        return Fail("%s: Could not add the MeanLayer", __func__);
     }
+    input.Connect(layer->GetInputSlot(0));
 
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
-                                                   data, nullptr, validateFunc, activationFunction);
+    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
 }
 
 template<typename HalPolicy,
@@ -3501,12 +3548,14 @@ bool ConvertPad(HalOperation& operation, const HalModel& model, ConversionData&
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsPadSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -3527,7 +3576,11 @@ bool ConvertPad(HalOperation& operation, const HalModel& model, ConversionData&
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddPadLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the PadLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -3586,12 +3639,14 @@ bool ConvertReshape(const HalOperation& operation, const HalModel& model, Conver
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsReshapeSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outputInfo,
                                    reshapeDescriptor);
@@ -3612,81 +3667,14 @@ bool ConvertReshape(const HalOperation& operation, const HalModel& model, Conver
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddReshapeLayer(reshapeDescriptor);
-    assert(layer != nullptr);
-    input.Connect(layer->GetInputSlot(0));
-
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
-         typename HalOperation = typename HalPolicy::Operation,
-         typename HalModel     = typename HalPolicy::Model>
-bool ConvertSub(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
-    using HalOperand = typename HalPolicy::Operand;
-
-    LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
-    LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
-    if (!input0.IsValid() || !input1.IsValid())
+    layer->SetBackendId(setBackend);
+    if (!layer)
     {
-        return Fail("%s: Operation has invalid inputs", __func__);
+        return Fail("%s: Could not add the ReshapeLayer", __func__);
     }
+    input.Connect(layer->GetInputSlot(0));
 
-    // The FuseActivation parameter is always the input index 2
-    // and it should be optional
-    ActivationFn activationFunction;
-    if (!GetOptionalInputActivation<HalPolicy>(operation, 2, activationFunction, model, data))
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
-    if (!output)
-    {
-        return Fail("%s: Could not read output 0", __func__);
-    }
-
-    const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
-
-    bool isSupported = false;
-    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
-    {
-        FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                                   IsSubtractionSupported,
-                                   data.m_Backends,
-                                   isSupported,
-                                   input0.GetTensorInfo(),
-                                   input1.GetTensorInfo(),
-                                   outputInfo);
-    };
-
-    if(IsDynamicTensor(outputInfo))
-    {
-        isSupported = AreDynamicTensorsSupported();
-    }
-    else
-    {
-        validateFunc(outputInfo, isSupported);
-    }
-
-    if (!isSupported)
-    {
-        return false;
-    }
-
-    armnn::IConnectableLayer* const startLayer = data.m_Network->AddSubtractionLayer();
-
-    const armnn::TensorInfo& inputTensorInfo0 = input0.GetTensorInfo();
-    const armnn::TensorInfo& inputTensorInfo1 = input1.GetTensorInfo();
-
-    bool isReshapeSupported = BroadcastTensor(input0, input1, startLayer, data);
-    if (!isReshapeSupported)
-    {
-        return false;
-    }
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
-                                                   data, nullptr, validateFunc, activationFunction);
+    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
 }
 
 template<typename HalPolicy,
@@ -3724,13 +3712,13 @@ bool ConvertSqueeze(const HalOperation& operation, const HalModel& model, Conver
     // if the operand index is out of bounds.
     const HalOperand* axisOperand = GetInputOperand<HalPolicy>(operation, 1, model, false);
 
-    const uint32_t dimensionSequence[] = { 0, 1, 2, 3 };
-
     std::vector<int32_t> axis;
     if (!axisOperand)
     {
-        axis.assign(dimensionSequence,
-                    dimensionSequence + rank);
+        for (unsigned int i = 0; i < rank; ++i)
+        {
+            axis.push_back(static_cast<unsigned int>(i));
+        }
     }
     else if (!GetTensorInt32Values<HalPolicy>(*axisOperand, axis, model, data))
     {
@@ -3757,10 +3745,12 @@ bool ConvertSqueeze(const HalOperation& operation, const HalModel& model, Conver
     reshapeDesc.m_TargetShape = outputInfo.GetShape();
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsReshapeSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                inputInfo,
                                outputInfo,
                                reshapeDesc);
@@ -3771,7 +3761,11 @@ bool ConvertSqueeze(const HalOperation& operation, const HalModel& model, Conver
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddReshapeLayer(reshapeDesc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ReshapeLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data);
@@ -3857,12 +3851,14 @@ bool ConvertStridedSlice(const HalOperation& operation, const HalModel& model, C
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsStridedSliceSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -3887,8 +3883,6 @@ bool ConvertStridedSlice(const HalOperation& operation, const HalModel& model, C
     for (unsigned int i = 0; i < inputShape.GetNumDimensions(); i++)
     {
         int stride = descriptor.m_Stride[i];
-        int start  = descriptor.GetStartForAxis(inputShape, i);
-        int stop   = descriptor.GetStopForAxis(inputShape, i, start);
 
         if (descriptor.m_ShrinkAxisMask & (1 << i))
         {
@@ -3908,7 +3902,11 @@ bool ConvertStridedSlice(const HalOperation& operation, const HalModel& model, C
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddStridedSliceLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the StridedSliceLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -3966,12 +3964,14 @@ bool ConvertTranspose(const HalOperation& operation, const HalModel& model, Conv
     const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsTransposeSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    transposeDesc);
@@ -3992,7 +3992,11 @@ bool ConvertTranspose(const HalOperation& operation, const HalModel& model, Conv
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddTransposeLayer(transposeDesc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the TransposeLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -4060,12 +4064,14 @@ bool ConvertBatchToSpaceNd(const HalOperation& operation,
     batchToSpaceNdDesc.m_Crops = {{0, 0}, {0, 0}};
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsBatchToSpaceNdSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    batchToSpaceNdDesc);
@@ -4087,7 +4093,11 @@ bool ConvertBatchToSpaceNd(const HalOperation& operation,
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddBatchToSpaceNdLayer(batchToSpaceNdDesc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the BatchToSpaceNdLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -4162,7 +4172,8 @@ bool ConvertSpaceToBatchNd(const HalOperation& operation, const HalModel& model,
             return Fail("%s: Operation has invalid paddings operand, invalid padding values.", __func__);
         }
 
-        paddingList.emplace_back((unsigned int) paddingBeforeInput, (unsigned int) paddingAfterInput);
+        paddingList.emplace_back(static_cast<unsigned int>(paddingBeforeInput),
+                                 static_cast<unsigned int>(paddingAfterInput));
     }
 
     armnn::SpaceToBatchNdDescriptor descriptor;
@@ -4176,12 +4187,14 @@ bool ConvertSpaceToBatchNd(const HalOperation& operation, const HalModel& model,
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsSpaceToBatchNdSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -4202,7 +4215,11 @@ bool ConvertSpaceToBatchNd(const HalOperation& operation, const HalModel& model,
     }
 
     armnn::IConnectableLayer* const layer = data.m_Network->AddSpaceToBatchNdLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the BatchToSpaceLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
diff --git a/ConversionUtils_1_2.hpp b/ConversionUtils_1_2.hpp
index 779d88f..2ad14c2 100644
--- a/ConversionUtils_1_2.hpp
+++ b/ConversionUtils_1_2.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -23,6 +23,31 @@ using namespace armnn;
 using namespace android::nn;
 
 template<typename HalPolicy,
+        typename HalOperation = typename HalPolicy::Operation,
+        typename HalModel     = typename HalPolicy::Model>
+bool IsWeightsValid(const HalOperation& operation,
+                    uint32_t inputIndex,
+                    const HalModel& model)
+{
+    using HalOperand         = typename HalPolicy::Operand;
+    using HalOperandLifeTime = typename HalPolicy::OperandLifeTime;
+    const HalOperand* operand = GetInputOperand<HalPolicy>(operation, inputIndex, model);
+    if (!operand)
+    {
+        Fail("%s: failed to get input operand %i", __func__, inputIndex);
+        return false;
+    }
+
+    if (operand->lifetime    != HalOperandLifeTime::CONSTANT_COPY
+        && operand->lifetime != HalOperandLifeTime::CONSTANT_REFERENCE
+        && operand->lifetime != HalOperandLifeTime::NO_VALUE)
+    {
+        return false;
+    }
+    return true;
+}
+
+template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
 bool IsQSymmDequantizeForWeights(const HalOperation& operation, const HalModel& model)
@@ -110,6 +135,157 @@ bool SetupAndTrackLayerOutputSlotAndOverrideTensorInfo(const HalOperation& opera
 }
 
 template<typename HalPolicy,
+    typename HalOperation = typename HalPolicy::Operation,
+    typename HalModel     = typename HalPolicy::Model>
+bool ConvertCast(const HalOperation& operation,
+                 const HalModel& model,
+                 ConversionData& data)
+{
+    using HalOperand = typename HalPolicy::Operand;
+
+    ALOGV("HalPolicy::ConvertCast()");
+
+    LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+
+    if (!input.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+
+    const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+    if (!output)
+    {
+        return Fail("%s: Could not read output 0", __func__);
+    }
+
+    const TensorInfo& inputInfo  = input.GetTensorInfo();
+    const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+    bool isSupported = false;
+    armnn::BackendId setBackend;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   IsCastSupported,
+                                   data.m_Backends,
+                                   isSupported,
+                                   setBackend,
+                                   inputInfo,
+                                   outputInfo);
+    };
+
+    if(!IsDynamicTensor(outputInfo))
+    {
+        validateFunc(outputInfo, isSupported);
+    }
+    else
+    {
+        isSupported = AreDynamicTensorsSupported();
+    }
+
+    if (!isSupported)
+    {
+        return false;
+    }
+
+    IConnectableLayer* layer = data.m_Network->AddCastLayer();
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the CastLayer", __func__);
+    }
+    input.Connect(layer->GetInputSlot(0));
+
+    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
+}
+
+template<typename HalPolicy,
+         typename HalOperation = typename HalPolicy::Operation,
+         typename HalModel     = typename HalPolicy::Model>
+bool ConvertChannelShuffle(const HalOperation& operation,
+                           const HalModel& model,
+                           ConversionData& data)
+{
+    using HalOperand = typename HalPolicy::Operand;
+    using HalOperandType = typename HalPolicy::OperandType;
+
+    ALOGV("HalPolicy::ConvertChannelShuffle()");
+
+    LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+    if (!input.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+    auto inputDimensions = static_cast<int32_t>(input.GetTensorInfo().GetNumDimensions());
+
+    ChannelShuffleDescriptor descriptor;
+
+    int32_t groups;
+    if (!GetInputScalar<HalPolicy>(operation, 1, HalOperandType::INT32, groups, model, data))
+    {
+        return Fail("%s: Operation has invalid or unsupported number of groups operand", __func__);
+    }
+    descriptor.m_NumGroups = static_cast<uint32_t>(groups);
+
+    int32_t axis;
+    if (!GetInputScalar<HalPolicy>(operation, 2, HalOperandType::INT32, axis, model, data))
+    {
+        return Fail("%s: Operation has invalid or unsupported dimension channel shuffle operand", __func__);
+    }
+    if (((axis < -inputDimensions) && (axis < 0)) || ((axis >= inputDimensions) && (axis > 0)))
+    {
+        return Fail("%s: Operation has invalid dimension: %d. It is out of bounds [-%d, %d))", __func__, axis,
+                    inputDimensions, inputDimensions);
+    }
+    int positiveAxis = (axis < 0) ? inputDimensions + axis : axis;
+    descriptor.m_Axis = static_cast<uint32_t>(positiveAxis);
+
+    const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+    if (!output)
+    {
+        return Fail("%s: Could not read output 0", __func__);
+    }
+
+    const TensorInfo& inputInfo  = input.GetTensorInfo();
+    const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+    bool isSupported = false;
+    armnn::BackendId setBackend;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   IsChannelShuffleSupported,
+                                   data.m_Backends,
+                                   isSupported,
+                                   setBackend,
+                                   inputInfo,
+                                   outputInfo,
+                                   descriptor);
+    };
+
+    if(!IsDynamicTensor(outputInfo))
+    {
+        validateFunc(outputInfo, isSupported);
+    }
+    else
+    {
+        isSupported = AreDynamicTensorsSupported();
+    }
+
+    if (!isSupported)
+    {
+        return false;
+    }
+
+    IConnectableLayer* layer = data.m_Network->AddChannelShuffleLayer(descriptor);
+    layer->SetBackendId(setBackend);
+    assert(layer != nullptr);
+    input.Connect(layer->GetInputSlot(0));
+
+    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
+}
+
+template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
 bool ConvertComparison_1_2(const HalOperation& operation,
@@ -143,12 +319,14 @@ bool ConvertComparison_1_2(const HalOperation& operation,
     ComparisonDescriptor descriptor(comparisonOperation);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsComparisonSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo0,
                                    inputInfo1,
                                    outputInfo,
@@ -171,7 +349,11 @@ bool ConvertComparison_1_2(const HalOperation& operation,
     }
 
     IConnectableLayer* layer = data.m_Network->AddComparisonLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ComparisonLayer", __func__);
+    }
 
     bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
     if (!isReshapeSupported)
@@ -179,12 +361,6 @@ bool ConvertComparison_1_2(const HalOperation& operation,
         return false;
     }
 
-    if(IsDynamicTensor(outputInfo))
-    {
-        input0.Connect(layer->GetInputSlot(0));
-        input1.Connect(layer->GetInputSlot(1));
-    }
-
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
 }
 
@@ -237,26 +413,31 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
     // The NNAPI filter is always OHWI [depth_out, filter_height, filter_width, depth_in] but ArmNN expects the
     // filter's height and width indices to match the input's height and width indices so we permute it to OIHW if
     // the DataLayout is NCHW
-    const ConstTensorPin weightsPin = (desc.m_DataLayout == DataLayout::NCHW) ?
-                                      ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 1,
-                                                                                       model, data, OHWIToOIHW) :
-                                      ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 1, model, data);
-    const ConstTensorPin biasPin    =
-        ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
 
-    if (!weightsPin.IsValid())
+
+    if (!IsWeightsValid<HalPolicy>(operation, 1, model) && desc.m_DataLayout == DataLayout::NCHW)
     {
-        return Fail("%s: Operation has invalid weights", __func__);
+        return Fail("%s: Operation has unsupported weights HalOperandLifeTime", __func__);
     }
 
-    if (!biasPin.IsValid())
+    LayerInputHandle weightsInput = (desc.m_DataLayout == DataLayout::NCHW) ?
+                                     ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data, OHWIToOIHW) :
+                                     ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+
+    if (!weightsInput.IsValid())
     {
-        return Fail("%s: Operation has invalid biases", __func__);
+        return Fail("%s: Operation has invalid inputs", __func__);
     }
 
-    ConstTensor weights = weightsPin.GetConstTensor();
-    ConstTensor bias = biasPin.GetConstTensor();
-    SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+    LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+    if (!biasInput.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+
+    biasInput.SanitizeQuantizationScale(weightsInput, input);
+    armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+    armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
 
     ActivationFn activation;
 
@@ -275,8 +456,8 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
         armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout);
         unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
         unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
-        const uint32_t kernelX = weights.GetShape()[widthIndex];
-        const uint32_t kernelY = weights.GetShape()[heightIndex];
+        const uint32_t kernelX = weightsInfo.GetShape()[widthIndex];
+        const uint32_t kernelY = weightsInfo.GetShape()[heightIndex];
         const uint32_t inputX  = inputInfo.GetShape()[widthIndex];
         const uint32_t inputY  = inputInfo.GetShape()[heightIndex];
 
@@ -305,19 +486,21 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
     }
 
     desc.m_BiasEnabled = true;
-    Optional<TensorInfo> biases(bias.GetInfo());
+    Optional<TensorInfo> biases(biasInfo);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsConvolution2dSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc,
-                                   weights.GetInfo(),
+                                   weightsInfo,
                                    biases);
     };
 
@@ -335,8 +518,8 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
         return false;
     }
 
-    IConnectableLayer* startLayer =
-        data.m_Network->AddConvolution2dLayer(desc, weights, Optional<ConstTensor>(bias));
+    armnn::IConnectableLayer* startLayer = data.m_Network->AddConvolution2dLayer(desc);
+    startLayer->SetBackendId(setBackend);
 
     if (!startLayer)
     {
@@ -344,6 +527,8 @@ bool ConvertConv2d_1_2(const HalOperation& operation, const HalModel& model, Con
     }
 
     input.Connect(startLayer->GetInputSlot(0));
+    weightsInput.Connect(startLayer->GetInputSlot(1));
+    biasInput.Connect(startLayer->GetInputSlot(2));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
                                                    data, nullptr, validateFunc, activation);
@@ -379,12 +564,11 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
     // ArmNN does not currently support non-fixed weights or bias
     // Find the shape of the weights tensor. In AndroidNN this will be [ 1, H, W, I * M ]
     const HalOperand* weightsOperand = GetInputOperand<HalPolicy>(operation, 1, model);
-
-    if (weightsOperand == nullptr)
+    if (!weightsOperand)
     {
-        return Fail("%s: Operand is invalid", __func__);
+        return Fail("%s: Could not read weights", __func__);
     }
-    if ( weightsOperand->dimensions[0] != 1)
+    if (weightsOperand->dimensions[0] != 1)
     {
         return Fail("%s: Invalid weights; for depthwise convolution, dimension 0 must be 1 but it is %i",
                     __func__, weightsOperand->dimensions[0] );
@@ -403,44 +587,30 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
     desc.m_DataLayout = OptionalDataLayout<HalPolicy>(operation, dataLayoutFlagIndex, model, data);
 
     armnnUtils::DataLayoutIndexed dataLayoutIndexed(desc.m_DataLayout);
-    unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
     unsigned int widthIndex = dataLayoutIndexed.GetWidthIndex();
     unsigned int heightIndex = dataLayoutIndexed.GetHeightIndex();
 
-    // Reinterpret weight data as [ H, W, I, M ]
-    TensorShape weightsShape({ weightsOperand->dimensions[1],
-                               weightsOperand->dimensions[2],
-                               inputInfo.GetShape()[channelsIndex],
-                               weightsOperand->dimensions[3] / inputInfo.GetShape()[channelsIndex] });
-
-    // Swizzle weight data [ H, W, I, M ] -> [ M, I, H, W ]
-    const PermutationVector HWIMToMIHW = { 2U, 3U, 1U, 0U };
-
-    const ConstTensorPin weightsPin =
-        ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
-                                                         1,
-                                                         model,
-                                                         data,
-                                                         HWIMToMIHW,
-                                                         &weightsShape);
-
-    // Bias is a 1D tensor
-    const ConstTensorPin biasPin =
-        ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 2, model, data);
+    LayerInputHandle weightsInput = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+    if (!weightsInput.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
 
-    if (!weightsPin.IsValid())
+    const HalOperand* biasOperand = GetInputOperand<HalPolicy>(operation, 2, model);
+    if (!biasOperand)
     {
-        return Fail("%s: Operation has invalid weights", __func__);
+        return Fail("%s: Could not read bias", __func__);
     }
 
-    if (!biasPin.IsValid())
+    LayerInputHandle biasInput = ConvertToLayerInputHandle<HalPolicy>(operation, 2, model, data); // 1D
+    if (!biasInput.IsValid())
     {
-        return Fail("%s: Operation has invalid biases", __func__);
+        return Fail("%s: Operation has invalid inputs", __func__);
     }
 
-    ConstTensor weights = weightsPin.GetConstTensor();
-    ConstTensor bias = biasPin.GetConstTensor();
-    SanitizeBiasQuantizationScale(bias.GetInfo(), weights.GetInfo(), inputInfo);
+    biasInput.SanitizeQuantizationScale(weightsInput, input);
+    armnn::TensorInfo weightsInfo = weightsInput.GetTensorInfo();
+    armnn::TensorInfo biasInfo = biasInput.GetTensorInfo();
 
     ActivationFn activation;
 
@@ -456,8 +626,8 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
             return Fail("%s: Operation has invalid inputs (implicit padding)", __func__);
         }
 
-        const uint32_t kernelX = weights.GetShape()[3];
-        const uint32_t kernelY = weights.GetShape()[2];
+        const uint32_t kernelX = weightsInfo.GetShape()[2];
+        const uint32_t kernelY = weightsInfo.GetShape()[1];
         const uint32_t inputX  = inputInfo.GetShape()[widthIndex];
         const uint32_t inputY  = inputInfo.GetShape()[heightIndex];
 
@@ -485,19 +655,21 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
     }
 
     desc.m_BiasEnabled = true;
-    Optional<TensorInfo> biases(bias.GetInfo());
+    Optional<TensorInfo> biases(biasInfo);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsDepthwiseConvolutionSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc,
-                                   weights.GetInfo(),
+                                   weightsInfo,
                                    biases);
     };
 
@@ -515,8 +687,8 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
         return false;
     }
 
-    IConnectableLayer* startLayer =
-        data.m_Network->AddDepthwiseConvolution2dLayer(desc, weights, Optional<ConstTensor>(bias));
+    armnn::IConnectableLayer* startLayer = data.m_Network->AddDepthwiseConvolution2dLayer(desc);
+    startLayer->SetBackendId(setBackend);
 
     if (!startLayer)
     {
@@ -525,6 +697,10 @@ bool ConvertDepthwiseConv2d_1_2(const HalOperation& operation, const HalModel& m
 
     input.Connect(startLayer->GetInputSlot(0));
 
+    // Connect weights and bias inputs
+    weightsInput.Connect(startLayer->GetInputSlot(1));
+    biasInput.Connect(startLayer->GetInputSlot(2));
+
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *startLayer, model,
                                                    data, nullptr, validateFunc, activation);
 }
@@ -578,13 +754,14 @@ bool ConvertElementwiseUnary(const HalOperation& operation,
     ElementwiseUnaryDescriptor descriptor(unaryOperation);
 
     bool isSupported = false;
-
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsElementwiseUnarySupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -605,7 +782,11 @@ bool ConvertElementwiseUnary(const HalOperation& operation,
     }
 
     IConnectableLayer* layer = data.m_Network->AddElementwiseUnaryLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ElementwiseUnaryLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -653,21 +834,18 @@ bool ConvertExpandDims(const HalOperation& operation, const HalModel& model, Con
         return Fail("%s: %s", __func__, e.what());
     }
 
-    if (targetShape != outputInfo.GetShape())
-    {
-        return Fail("%s: Shape of the output operand does not match the resolved expanded shape", __func__);
-    }
-
     ReshapeDescriptor reshapeDescriptor;
     reshapeDescriptor.m_TargetShape = targetShape;
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsReshapeSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outputInfo,
                                    reshapeDescriptor);
@@ -675,6 +853,10 @@ bool ConvertExpandDims(const HalOperation& operation, const HalModel& model, Con
 
     if(!IsDynamicTensor(outputInfo))
     {
+        if (targetShape != outputInfo.GetShape())
+        {
+            return Fail("%s: Shape of the output operand does not match the resolved expanded shape", __func__);
+        }
         validateFunc(outputInfo, isSupported);
     }
     else
@@ -688,7 +870,11 @@ bool ConvertExpandDims(const HalOperation& operation, const HalModel& model, Con
     }
 
     IConnectableLayer* layer = data.m_Network->AddReshapeLayer(reshapeDescriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ReshapeLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -736,7 +922,8 @@ bool ConvertGather(const HalOperation& operation, const HalModel& model, Convers
     {
         return Fail("%s: Operation has invalid or unsupported axis operand", __func__);
     }
-    if (((axis < -inputDimensions) && (axis < 0)) || ((axis >= inputDimensions) && (axis > 0)))
+    int32_t inputDimensions_int = static_cast<int32_t>(inputDimensions);
+    if ((axis < -inputDimensions_int) || (inputDimensions_int <= axis))
     {
         return Fail("%s: Operation has invalid axis: %d. It is out of bounds [-%d, %d))", __func__, axis,
                     inputDimensions, inputDimensions);
@@ -746,12 +933,14 @@ bool ConvertGather(const HalOperation& operation, const HalModel& model, Convers
     desc.m_Axis = axis;
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsGatherSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    indices.GetTensorInfo(),
                                    outputInfo,
@@ -773,7 +962,11 @@ bool ConvertGather(const HalOperation& operation, const HalModel& model, Convers
     }
 
     IConnectableLayer* layer = data.m_Network->AddGatherLayer(desc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the GatherLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
     indices.Connect(layer->GetInputSlot(1));
 
@@ -841,7 +1034,6 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
     const TensorShape& inputShape   = inputInfo.GetShape();
     const TensorShape& outputShape  = outputInfo.GetShape();
     const TensorShape& weightsShape = weights.GetShape();
-    const TensorShape& biasesShape  = biases.GetShape();
 
     armnnUtils::DataLayoutIndexed dataLayoutIndexed(dataLayout);
     const unsigned int channelsIndex = dataLayoutIndexed.GetChannelsIndex();
@@ -852,7 +1044,7 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
     desc.m_DataLayout  = dataLayout;
     desc.m_BiasEnabled = true;
 
-    int numGroups;
+    unsigned int numGroups;
     ActivationFn activation;
 
     if (operation.inputs.size() == 12)
@@ -940,10 +1132,12 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackendSplit;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsSplitterSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackendSplit,
                                inputInfo,
                                splitterOutputInfos,
                                splitterDesc);
@@ -953,6 +1147,7 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
     }
 
     IConnectableLayer* splitterLayer = data.m_Network->AddSplitterLayer(splitterDesc);
+    splitterLayer->SetBackendId(setBackendSplit);
     if (!splitterLayer)
     {
         return Fail("%s: Failed to add SplitterLayer", __func__);
@@ -1036,12 +1231,14 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
                                                               biasesDataOffset));
 
             isSupported = false;
+            armnn::BackendId setBackendConv;
             auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
             {
                 FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                            IsConvolution2dSupported,
                                            data.m_Backends,
                                            isSupported,
+                                           setBackendConv,
                                            groupInputInfo,
                                            outputInfo,
                                            desc,
@@ -1063,14 +1260,22 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
                 return false;
             }
 
-            IConnectableLayer* convLayer =
-                data.m_Network->AddConvolution2dLayer(desc, groupWeights, Optional<ConstTensor>(groupBiases));
+            IConnectableLayer* weightsLayer = data.m_Network->AddConstantLayer(groupWeights);
+            IConnectableLayer* biasLayer = data.m_Network->AddConstantLayer(groupBiases);
+            IConnectableLayer* convLayer = data.m_Network->AddConvolution2dLayer(desc);
+            convLayer->SetBackendId(setBackendConv);
+
             if (!convLayer)
             {
                 return Fail("%s: AddConvolution2dLayer failed", __func__);
             }
 
             splitterLayer->GetOutputSlot(group).Connect(convLayer->GetInputSlot(0));
+            weightsLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(1));
+            biasLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(2));
+
+            weightsLayer->GetOutputSlot(0).SetTensorInfo(groupWeightsInfo);
+            biasLayer->GetOutputSlot(0).SetTensorInfo(groupBiasesInfo);
             convLayer->GetOutputSlot(0).SetTensorInfo(groupOutputInfo);
 
             if(isDynamic)
@@ -1108,10 +1313,12 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
     }
 
     isSupported = false;
+    armnn::BackendId setBackendConcat;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsConcatSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackendConcat,
                                std::vector<const TensorInfo*>(numGroups * channelMultiplier, &groupOutputInfo),
                                outputInfo,
                                concatDescriptor);
@@ -1122,6 +1329,7 @@ bool ConvertGroupedConv2d(const HalOperation& operation, const HalModel& model,
     }
 
     IConnectableLayer* concatLayer = data.m_Network->AddConcatLayer(concatDescriptor);
+    concatLayer->SetBackendId(setBackendConcat);
     if (!concatLayer)
     {
         return Fail("%s: AddConcatLayer failed", __func__);
@@ -1209,12 +1417,14 @@ bool ConvertInstanceNormalization(const HalOperation& operation, const HalModel&
     desc.m_DataLayout = OptionalDataLayout<HalPolicy>(operation, 4, model, data);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsInstanceNormalizationSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outputInfo,
                                    desc);
@@ -1235,6 +1445,7 @@ bool ConvertInstanceNormalization(const HalOperation& operation, const HalModel&
     }
 
     IConnectableLayer* layer = data.m_Network->AddInstanceNormalizationLayer(desc);
+    layer->SetBackendId(setBackend);
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1303,12 +1514,14 @@ bool ConvertLogSoftmax(const HalOperation& operation, const HalModel& model, Con
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsLogSoftmaxSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outputInfo,
                                    descriptor);
@@ -1329,11 +1542,11 @@ bool ConvertLogSoftmax(const HalOperation& operation, const HalModel& model, Con
     }
 
     IConnectableLayer* layer = data.m_Network->AddLogSoftmaxLayer(descriptor);
+    layer->SetBackendId(setBackend);
     if (!layer)
     {
-        return Fail("%s: AddLogSoftmaxLayer() returned nullptr", __func__);
+        return Fail("%s: Could not add the LogSoftmaxLayer", __func__);
     }
-
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1342,130 +1555,6 @@ bool ConvertLogSoftmax(const HalOperation& operation, const HalModel& model, Con
 template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
-bool ConvertMaximum(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
-    using HalOperand = typename HalPolicy::Operand;
-
-    ALOGV("HalPolicy::ConvertMaximum()");
-
-    LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
-    LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
-    if (!input0.IsValid() || !input1.IsValid())
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    const HalOperand* outputOperand = GetOutputOperand<HalPolicy>(operation, 0, model);
-    if (!outputOperand)
-    {
-        return Fail("%s: Could not read output", __func__);
-    }
-
-    const TensorInfo& outInfo = GetTensorInfoForOperand(*outputOperand);
-
-    bool isSupported = false;
-    auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported)
-    {
-        FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                                   IsMaximumSupported,
-                                   data.m_Backends,
-                                   isSupported,
-                                   input0.GetTensorInfo(),
-                                   input1.GetTensorInfo(),
-                                   outInfo);
-    };
-
-    if(IsDynamicTensor(outInfo))
-    {
-        isSupported = AreDynamicTensorsSupported();
-    }
-    else
-    {
-        validateFunc(outInfo, isSupported);
-    }
-
-    if (!isSupported)
-    {
-        return false;
-    }
-
-    IConnectableLayer* layer = data.m_Network->AddMaximumLayer();
-    assert(layer != nullptr);
-    bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
-    if (!isReshapeSupported)
-    {
-        return false;
-    }
-
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
-         typename HalOperation = typename HalPolicy::Operation,
-         typename HalModel     = typename HalPolicy::Model>
-bool ConvertMinimum(const HalOperation& operation, const HalModel& model, ConversionData& data)
-{
-    using HalOperand = typename HalPolicy::Operand;
-
-    ALOGV("HalPolicy::ConvertMinimum()");
-
-    LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
-    LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
-
-    if (!input0.IsValid() || !input1.IsValid())
-    {
-        return Fail("%s: Operation has invalid inputs", __func__);
-    }
-
-    const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
-    if (!output)
-    {
-        return Fail("%s: Could not read output 0", __func__);
-    }
-
-    const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
-
-    bool isSupported = false;
-    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
-    {
-        FORWARD_LAYER_SUPPORT_FUNC(__func__,
-                                   IsMinimumSupported,
-                                   data.m_Backends,
-                                   isSupported,
-                                   input0.GetTensorInfo(),
-                                   input1.GetTensorInfo(),
-                                   outputInfo);
-    };
-
-    if(IsDynamicTensor(outputInfo))
-    {
-        isSupported = AreDynamicTensorsSupported();
-    }
-    else
-    {
-        validateFunc(outputInfo, isSupported);
-    }
-
-    if (!isSupported)
-    {
-        return false;
-    }
-
-    IConnectableLayer* const layer = data.m_Network->AddMinimumLayer();
-    assert(layer != nullptr);
-    bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
-    if (!isReshapeSupported)
-    {
-        return false;
-    }
-
-    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
-}
-
-template<typename HalPolicy,
-         typename HalOperation = typename HalPolicy::Operation,
-         typename HalModel     = typename HalPolicy::Model>
 bool ConvertPadV2(const HalOperation& operation, const HalModel& model, ConversionData& data)
 {
     using HalOperand     = typename HalPolicy::Operand;
@@ -1524,7 +1613,7 @@ bool ConvertPadV2(const HalOperation& operation, const HalModel& model, Conversi
             return Fail("%s: Could not read input 2 (FLOAT32)", __func__);
         }
     }
-    else if (operandType0 == HalOperandType::TENSOR_QUANT8_ASYMM && operandType2 == HalOperandType::INT32)
+    else if (isQuantizedOperand(operandType0) && operandType2 == HalOperandType::INT32)
     {
         int32_t intPadValue = 0;
         if (!GetInputInt32<HalPolicy>(operation, 2, intPadValue, model, data))
@@ -1539,12 +1628,14 @@ bool ConvertPadV2(const HalOperation& operation, const HalModel& model, Conversi
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsPadSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -1565,7 +1656,11 @@ bool ConvertPadV2(const HalOperation& operation, const HalModel& model, Conversi
     }
 
     IConnectableLayer* const layer = data.m_Network->AddPadLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the PadLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1600,12 +1695,14 @@ bool ConvertPrelu(const HalOperation& operation, const HalModel& model, Conversi
     const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsPreluSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    alphaInfo,
                                    outputInfo);
@@ -1626,10 +1723,10 @@ bool ConvertPrelu(const HalOperation& operation, const HalModel& model, Conversi
     }
 
     IConnectableLayer* const layer = data.m_Network->AddPreluLayer();
-
+    layer->SetBackendId(setBackend);
     if (!layer)
     {
-        return Fail("%s: AddPreluLayer failed", __func__);
+        return Fail("%s: Could not add the PreluLayer", __func__);
     }
 
     bool isReshapeSupported = BroadcastTensor(input, alpha, layer, data);
@@ -1665,12 +1762,14 @@ bool ConvertQuantize(const HalOperation& operation, const HalModel& model, Conve
     const TensorInfo& outputInfo = GetTensorInfoForOperand(*outputOperand);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsQuantizeSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outputInfo);
     };
@@ -1690,7 +1789,11 @@ bool ConvertQuantize(const HalOperation& operation, const HalModel& model, Conve
     }
 
     IConnectableLayer* const layer = data.m_Network->AddQuantizeLayer();
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the QuantizeLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -1884,12 +1987,14 @@ bool ConvertQuantized16BitLstm(const HalOperation& operation, const HalModel& mo
     paramsInfo.m_OutputGateBias           = &(params.m_OutputGateBias->GetInfo());
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsQuantizedLstmSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    previousCellStateInInfo,
                                    previousOutputInInfo,
@@ -1916,6 +2021,7 @@ bool ConvertQuantized16BitLstm(const HalOperation& operation, const HalModel& mo
     }
 
     IConnectableLayer* const layer = data.m_Network->AddQuantizedLstmLayer(params, "QuantizedLstm");
+    layer->SetBackendId(setBackend);
     input.Connect(layer->GetInputSlot(0));
     previousCellStateIn.Connect(layer->GetInputSlot(1));
     previousOutputIn.Connect(layer->GetInputSlot(2));
@@ -1937,6 +2043,98 @@ bool ConvertQuantized16BitLstm(const HalOperation& operation, const HalModel& mo
 template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
+bool ConvertReduce(const HalOperation& operation,
+                   const HalModel& model,
+                   ConversionData& data,
+                   ReduceOperation reduceOperation)
+{
+    using HalOperand     = typename HalPolicy::Operand;
+    using HalOperandType = typename HalPolicy::OperandType;
+
+    armnn::ReduceDescriptor descriptor;
+    descriptor.m_ReduceOperation = reduceOperation;
+
+    LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+    if (!input.IsValid())
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+    const armnn::TensorInfo& inputInfo = input.GetTensorInfo();
+
+    const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+    if (!output)
+    {
+        return Fail("%s: Could not read output 0", __func__);
+    }
+    const armnn::TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+    const HalOperand* axisOperand = GetInputOperand<HalPolicy>(operation, 1, model);
+    if (!axisOperand)
+    {
+        return Fail("%s: Could not read input 1", __func__);
+    }
+    std::vector<int32_t> axis;
+    if (!GetTensorInt32Values<HalPolicy>(*axisOperand, axis, model, data))
+    {
+        return Fail("%s: Input 1 has invalid values", __func__);
+    }
+
+    // Convert the axis to unsigned int and remove duplicates.
+    unsigned int rank = inputInfo.GetNumDimensions();
+    std::set<unsigned int> uniqueAxis;
+    std::transform(axis.begin(), axis.end(),
+                   std::inserter(uniqueAxis, uniqueAxis.begin()),
+                   [rank](int i) -> unsigned int { return (i + rank) % rank; });
+    descriptor.m_vAxis.assign(uniqueAxis.begin(), uniqueAxis.end());
+
+    // Get the "keep dims" flag.
+    if (!GetInputScalar<HalPolicy>(operation, 2, HalOperandType::BOOL, descriptor.m_KeepDims, model, data))
+    {
+        return Fail("%s: Could not read input 2", __func__);
+    }
+
+    bool isSupported = false;
+    armnn::BackendId setBackend;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   IsReduceSupported,
+                                   data.m_Backends,
+                                   isSupported,
+                                   setBackend,
+                                   inputInfo,
+                                   outputInfo,
+                                   descriptor);
+    };
+
+    if(!IsDynamicTensor(outputInfo))
+    {
+        validateFunc(outputInfo, isSupported);
+    }
+    else
+    {
+        isSupported = AreDynamicTensorsSupported();
+    }
+
+    if (!isSupported)
+    {
+        return false;
+    }
+
+    armnn::IConnectableLayer* const layer = data.m_Network->AddReduceLayer(descriptor);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ReduceLayer", __func__);
+    }
+    input.Connect(layer->GetInputSlot(0));
+
+    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
+}
+
+template<typename HalPolicy,
+         typename HalOperation = typename HalPolicy::Operation,
+         typename HalModel     = typename HalPolicy::Model>
 bool ConvertResize(const HalOperation& operation,
                    const HalModel& model,
                    ConversionData& data,
@@ -2051,12 +2249,14 @@ bool ConvertResize(const HalOperation& operation,
     descriptor.m_HalfPixelCenters = GetOptionalBool<HalPolicy>(operation, 5, model, data);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsResizeSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    descriptor);
@@ -2077,7 +2277,11 @@ bool ConvertResize(const HalOperation& operation,
     }
 
     IConnectableLayer* layer = data.m_Network->AddResizeLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the ResizeLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2126,12 +2330,14 @@ bool ConvertSpaceToDepth(const HalOperation& operation, const HalModel& model, C
     desc.m_DataLayout = OptionalDataLayout<HalPolicy>(operation, 2, model, data);
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsSpaceToDepthSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc);
@@ -2152,7 +2358,11 @@ bool ConvertSpaceToDepth(const HalOperation& operation, const HalModel& model, C
     }
 
     IConnectableLayer* const layer = data.m_Network->AddSpaceToDepthLayer(desc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the SpaceToDepthLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2216,12 +2426,14 @@ bool ConvertSoftmax(const HalOperation& operation, const HalModel& model, Conver
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsSoftmaxSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    input.GetTensorInfo(),
                                    outputInfo,
                                    desc);
@@ -2242,7 +2454,11 @@ bool ConvertSoftmax(const HalOperation& operation, const HalModel& model, Conver
     }
 
     IConnectableLayer* layer = data.m_Network->AddSoftmaxLayer(desc);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the SoftmaxLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
@@ -2389,7 +2605,7 @@ bool ConvertLstm(const HalOperation& operation, const HalModel& model, Conversio
     //     If set to 0.0 then clipping is disabled.
     // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
     //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
-    ActivationFn activation;
+    ActivationFn activation = ActivationFn::kActivationNone;
     float cellClip;
     float projClip;
     if (!GetInputActivationFunctionFromTensor<HalPolicy>(operation, 20, activation, model, data) ||
@@ -2603,12 +2819,14 @@ bool ConvertLstm(const HalOperation& operation, const HalModel& model, Conversio
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsLstmSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputStateInInfo,
                                    cellStateInInfo,
@@ -2641,6 +2859,7 @@ bool ConvertLstm(const HalOperation& operation, const HalModel& model, Conversio
 
     // Add the layer
     IConnectableLayer* layer = data.m_Network->AddLstmLayer(desc, params, "Lstm");
+    layer->SetBackendId(setBackend);
 
     input.Connect(layer->GetInputSlot(0));
     outputStateIn.Connect(layer->GetInputSlot(1));
@@ -2840,12 +3059,14 @@ bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model
     Optional<TensorInfo> biases(bias.GetInfo());
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsTransposeConvolution2dSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputInfo,
                                    desc,
@@ -2868,6 +3089,7 @@ bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model
 
     IConnectableLayer* startLayer =
         data.m_Network->AddTransposeConvolution2dLayer(desc, weights, Optional<ConstTensor>(bias));
+    startLayer->SetBackendId(setBackend);
     if (!startLayer)
     {
         return Fail("%s: AddTransposeConvolution2dLayer failed", __func__);
@@ -2879,4 +3101,450 @@ bool ConvertTransposeConv2d(const HalOperation& operation, const HalModel& model
                                                    data, nullptr, validateFunc, activation);
 }
 
+template<typename HalPolicy,
+         typename HalOperation = typename HalPolicy::Operation,
+         typename HalModel     = typename HalPolicy::Model>
+bool ConvertUnidirectionalSequenceLstm(const HalOperation& operation,
+                                       const HalModel& model,
+                                       ConversionData& data)
+{
+    using HalOperand = typename HalPolicy::Operand;
+    using HalOperandType = typename HalPolicy::OperandType;
+
+    ALOGV("HalPolicy::ConvertUnidirectionalSequenceLstm()");
+
+    // Determine if input OperandType is ANEURALNETWORKS_TENSOR_FLOAT 32 or 16
+    HalOperandType inputType;
+    if (!GetOperandType<HalPolicy>(operation, 0, model, inputType))
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+
+    // Inputs:
+    // 0: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+    // [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length), “batch_size”
+    // corresponds to the batching dimension, and “input_size” is the size of the input.
+    LayerInputHandle input = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+    if (!input.IsValid())
+    {
+        return Fail("%s: Could not read input 0: input", __func__);
+    }
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [batch_size, output_size].
+    LayerInputHandle outputStateIn = ConvertToLayerInputHandle<HalPolicy>(operation, 18, model, data);
+    if (!outputStateIn.IsValid())
+    {
+        return Fail("%s: Could not read input 18: outputStateIn", __func__);
+    }
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [batch_size, num_units].
+    LayerInputHandle cellStateIn = ConvertToLayerInputHandle<HalPolicy>(operation, 19, model, data);
+    if (!cellStateIn.IsValid())
+    {
+        return Fail("%s: Could not read input 19: cellStateIn", __func__);
+    }
+
+    // Get the mandatory input tensors:
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //     [num_units, input_size].
+    const ConstTensorPin inputToForgetWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 2));
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    // [num_units, input_size].
+    const ConstTensorPin inputToCellWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 3));
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //     [num_units, input_size].
+    const ConstTensorPin inputToOutputWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 4));
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //     [num_units, output_size].
+    const ConstTensorPin recurrentToForgetWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 6));
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    const ConstTensorPin recurrentToCellWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 7));
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //     [num_units, output_size].
+    const ConstTensorPin recurrentToOutputWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 8));
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+    const ConstTensorPin forgetGateBiasPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 13, model, data);
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+    const ConstTensorPin cellBiasPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 14, model, data);
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+    const ConstTensorPin outputGateBiasPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation, 15, model, data);
+
+    if (!inputToForgetWeightsPin.IsValid() ||
+        !inputToCellWeightsPin.IsValid() ||
+        !inputToOutputWeightsPin.IsValid() ||
+        !recurrentToForgetWeightsPin.IsValid() ||
+        !recurrentToCellWeightsPin.IsValid() ||
+        !recurrentToOutputWeightsPin.IsValid() ||
+        !forgetGateBiasPin.IsValid() ||
+        !cellBiasPin.IsValid() ||
+        !outputGateBiasPin.IsValid())
+    {
+        return Fail("%s: Operation has invalid tensor inputs", __func__);
+    }
+
+    // Get the optional input tensors:
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    const ConstTensorPin inputToInputWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 1, true));
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    const ConstTensorPin recurrentToInputWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 5, true));
+    // 09: The cell-to-input weights: Optional.
+    // A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+    const ConstTensorPin cellToInputWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 9, true));
+    // 10: The cell-to-forget weights: Optional.
+    // A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+    const ConstTensorPin cellToForgetWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 10, true));
+    // 11: The cell-to-output weights: Optional.
+    // A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+    const ConstTensorPin cellToOutputWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 11, true));
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [num_units].
+    const ConstTensorPin inputGateBiasPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+                                                                              12,
+                                                                              model,
+                                                                              data,
+                                                                              g_DontPermute,
+                                                                              nullptr,
+                                                                              true);
+
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //     [output_size, num_units].
+    const ConstTensorPin projectionWeightsPin =
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 16, true));
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape [output_size].
+    const ConstTensorPin projectionBiasPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+                                                                              17,
+                                                                              model,
+                                                                              data,
+                                                                              g_DontPermute,
+                                                                              nullptr,
+                                                                              true);
+
+    if ((!inputToInputWeightsPin.IsValid() && !inputToInputWeightsPin.IsOptional()) ||
+        (!recurrentToInputWeightsPin.IsValid() && !recurrentToInputWeightsPin.IsOptional()) ||
+        (!cellToInputWeightsPin.IsValid() && !cellToInputWeightsPin.IsOptional()) ||
+        (!cellToForgetWeightsPin.IsValid() && !cellToForgetWeightsPin.IsOptional()) ||
+        (!cellToOutputWeightsPin.IsValid() && !cellToOutputWeightsPin.IsOptional()) ||
+        (!inputGateBiasPin.IsValid() && !inputGateBiasPin.IsOptional()) ||
+        (!projectionWeightsPin.IsValid() && !projectionWeightsPin.IsOptional()) ||
+        (!projectionBiasPin.IsValid() && !projectionBiasPin.IsOptional()))
+    {
+        return Fail("%s: Operation has invalid tensor inputs", __func__);
+    }
+
+    // Get the mandatory input scalars (actually 1-D tensors of size 1):
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    // Determine data type of input tensor
+    ActivationFn activation = ActivationFn::kActivationNone;
+    LstmDescriptor desc;
+
+    if (inputType == HalOperandType::TENSOR_FLOAT32)
+    {
+        float cellClip;
+        float projClip;
+
+        if (!GetInputActivationFunctionFromTensor<HalPolicy>(operation, 20, activation, model, data) ||
+            !GetInputScalar<HalPolicy>(operation, 21, HalOperandType::FLOAT32, cellClip, model, data) ||
+            !GetInputScalar<HalPolicy>(operation, 22, HalOperandType::FLOAT32, projClip, model, data))
+        {
+            return Fail("%s: Operation has invalid scalar inputs", __func__);
+        }
+
+        desc.m_ClippingThresCell = cellClip;
+        desc.m_ClippingThresProj = projClip;
+    }
+
+    if (inputType == HalOperandType::TENSOR_FLOAT16)
+    {
+        Half cellClip;
+        Half projClip;
+
+        if (!GetInputActivationFunctionFromTensor<HalPolicy>(operation, 20, activation, model, data) ||
+            !GetInputScalar<HalPolicy>(operation, 21, HalOperandType::FLOAT16, cellClip, model, data) ||
+            !GetInputScalar<HalPolicy>(operation, 22, HalOperandType::FLOAT16, projClip, model, data))
+        {
+            return Fail("%s: Operation has invalid scalar inputs", __func__);
+        }
+
+        desc.m_ClippingThresCell = cellClip;
+        desc.m_ClippingThresProj = projClip;
+    }
+
+    // Determine if time-major or batch-major.
+    // 23: Time-major if true, batch-major if false.
+    bool isTimeMajor = GetOptionalBool<HalPolicy>(operation, 23, model, data);
+
+    // Get the normalization tensors
+    // 24: The input layer normalization weights. A 1-D tensor of shape [num_units].
+    //     Used to rescale normalized inputs to activation at input gate.
+    const ConstTensorPin inputLayerNormWeightsPin
+                             (DequantizeAndMakeConstTensorPin<HalPolicy>(operation, model, data, 24, true));
+
+    // 25: The forget layer normalization weights. A 1-D tensor of shape [num_units].
+    //     Used to rescale normalized inputs to activation at forget gate.
+    const ConstTensorPin forgetLayerNormWeightsPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+                                                                              25,
+                                                                              model,
+                                                                              data,
+                                                                              g_DontPermute,
+                                                                              nullptr,
+                                                                              true);
+
+    // 26: The cell layer normalization weights. A 1-D tensor of shape [num_units].
+    //     Used to rescale normalized inputs to activation at cell gate.
+    const ConstTensorPin cellLayerNormWeightsPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+                                                                              26,
+                                                                              model,
+                                                                              data,
+                                                                              g_DontPermute,
+                                                                              nullptr,
+                                                                              true);
+
+    // 27: The output layer normalization weights. A 1-D tensor of shape [num_units].
+    //     Used to rescale normalized inputs to activation at output gate.
+    const ConstTensorPin outputLayerNormWeightsPin =
+                             ConvertOperationInputToConstTensorPin<HalPolicy>(operation,
+                                                                              27,
+                                                                              model,
+                                                                              data,
+                                                                              g_DontPermute,
+                                                                              nullptr,
+                                                                              true);
+
+    // Outputs:
+    // 00: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape:  if time-major:
+    // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+    const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+    if (!output)
+    {
+        return Fail("%s: Could not read output: ", __func__);
+    }
+
+    //
+    // 01 & 02: 
+    // hiddenStateOut and cellStateOut are not currently supported by our android versioning.
+    //
+
+    // set the params structure for the AddLstmLayer call
+    LstmInputParams params;
+    params.m_InputToInputWeights = inputToInputWeightsPin.GetConstTensorPtr();
+    params.m_InputToForgetWeights = inputToForgetWeightsPin.GetConstTensorPtr();
+    params.m_InputToCellWeights = inputToCellWeightsPin.GetConstTensorPtr();
+    params.m_InputToOutputWeights = inputToOutputWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToInputWeights = recurrentToInputWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToForgetWeights = recurrentToForgetWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToCellWeights = recurrentToCellWeightsPin.GetConstTensorPtr();
+    params.m_RecurrentToOutputWeights = recurrentToOutputWeightsPin.GetConstTensorPtr();
+    params.m_CellToInputWeights = cellToInputWeightsPin.GetConstTensorPtr();
+    params.m_CellToForgetWeights = cellToForgetWeightsPin.GetConstTensorPtr();
+    params.m_CellToOutputWeights = cellToOutputWeightsPin.GetConstTensorPtr();
+    params.m_InputGateBias = inputGateBiasPin.GetConstTensorPtr();
+    params.m_ForgetGateBias = forgetGateBiasPin.GetConstTensorPtr();
+    params.m_CellBias = cellBiasPin.GetConstTensorPtr();
+    params.m_OutputGateBias = outputGateBiasPin.GetConstTensorPtr();
+    params.m_ProjectionWeights = projectionWeightsPin.GetConstTensorPtr();
+    params.m_ProjectionBias = projectionBiasPin.GetConstTensorPtr();
+    params.m_InputLayerNormWeights = inputLayerNormWeightsPin.GetConstTensorPtr();
+    params.m_ForgetLayerNormWeights = forgetLayerNormWeightsPin.GetConstTensorPtr();
+    params.m_CellLayerNormWeights = cellLayerNormWeightsPin.GetConstTensorPtr();
+    params.m_OutputLayerNormWeights = outputLayerNormWeightsPin.GetConstTensorPtr();
+
+    // set the layer descriptor
+    desc.m_ActivationFunc = activation;
+    desc.m_CifgEnabled = (params.m_InputToInputWeights == nullptr ||
+        params.m_RecurrentToInputWeights == nullptr ||
+        params.m_InputGateBias == nullptr);
+    desc.m_PeepholeEnabled = (params.m_CellToForgetWeights != nullptr ||
+        params.m_CellToOutputWeights != nullptr);
+    desc.m_ProjectionEnabled = (params.m_ProjectionWeights != nullptr);
+    desc.m_LayerNormEnabled = (params.m_InputLayerNormWeights != nullptr ||
+        params.m_ForgetLayerNormWeights != nullptr ||
+        params.m_CellLayerNormWeights != nullptr ||
+        params.m_OutputLayerNormWeights != nullptr);
+    desc.m_TimeMajor = isTimeMajor;
+
+    // validate the optional input groups
+    if (desc.m_CifgEnabled &&
+        (params.m_InputToInputWeights != nullptr ||
+            params.m_RecurrentToInputWeights != nullptr ||
+            params.m_InputGateBias != nullptr))
+    {
+        return Fail("%s: All, or none, of input-to-input weights, recurrent-to-input weights,"
+                    " and input gate bias must be provided", __func__);
+    }
+
+    if (!desc.m_ProjectionEnabled && params.m_ProjectionBias != nullptr)
+    {
+        return Fail("%s: projection bias should not be provided without projection weights", __func__);
+    }
+
+    if (desc.m_PeepholeEnabled &&
+        (params.m_CellToForgetWeights == nullptr ||
+            params.m_CellToOutputWeights == nullptr ||
+            (!desc.m_CifgEnabled && params.m_CellToInputWeights == nullptr)))
+    {
+        return Fail("%s: All, or none, of cell-to-forget weights and cell-to-output weights must be provided"
+                    " and, if CIFG is not enabled, cell-to-input weights must also be provided", __func__);
+    }
+
+    if (desc.m_LayerNormEnabled &&
+        (params.m_ForgetLayerNormWeights == nullptr ||
+            params.m_CellLayerNormWeights == nullptr ||
+            params.m_OutputLayerNormWeights == nullptr ||
+            (!desc.m_CifgEnabled && params.m_InputLayerNormWeights == nullptr)))
+    {
+        return Fail("%s: All, or none, of forget-norm weights, cell-norm weights and output-norm weights must be"
+                    " provided and, if CIFG is not enabled, input-norm weights must also be provided", __func__);
+    }
+
+    // Check if the layer is supported
+    // Inputs
+    const TensorInfo& inputInfo         = input.GetTensorInfo();
+    const TensorInfo& outputStateInInfo = outputStateIn.GetTensorInfo();
+    const TensorInfo& cellStateInInfo   = cellStateIn.GetTensorInfo();
+
+    // Outputs
+    const TensorInfo& outputInfo         = GetTensorInfoForOperand(*output);
+
+    unsigned int batchSize               = inputInfo.GetShape()[0];
+    unsigned int outputSize              = outputInfo.GetShape()[2];
+    unsigned int numUnits                = cellStateInInfo.GetShape()[1];
+
+    armnn::DataType dataType             = inputInfo.GetDataType();
+    float qScale                         = inputInfo.GetQuantizationScale();
+    int qOffset                          = inputInfo.GetQuantizationOffset();
+
+    armnn::TensorInfo cellStateOutInfo({batchSize, numUnits}, cellStateInInfo.GetDataType(),
+                                       cellStateInInfo.GetQuantizationScale(), cellStateInInfo.GetQuantizationOffset());
+    armnn::TensorInfo outputStateOutInfo({batchSize, outputSize}, dataType, qScale, qOffset);
+
+    // Basic parameters
+    LstmInputParamsInfo paramsInfo;
+    paramsInfo.m_InputToForgetWeights     = &(params.m_InputToForgetWeights->GetInfo());
+    paramsInfo.m_InputToCellWeights       = &(params.m_InputToCellWeights->GetInfo());
+    paramsInfo.m_InputToOutputWeights     = &(params.m_InputToOutputWeights->GetInfo());
+    paramsInfo.m_RecurrentToForgetWeights = &(params.m_RecurrentToForgetWeights->GetInfo());
+    paramsInfo.m_RecurrentToCellWeights   = &(params.m_RecurrentToCellWeights->GetInfo());
+    paramsInfo.m_RecurrentToOutputWeights = &(params.m_RecurrentToOutputWeights->GetInfo());
+    paramsInfo.m_ForgetGateBias           = &(params.m_ForgetGateBias->GetInfo());
+    paramsInfo.m_CellBias                 = &(params.m_CellBias->GetInfo());
+    paramsInfo.m_OutputGateBias           = &(params.m_OutputGateBias->GetInfo());
+
+    // Optional parameters
+    if (!desc.m_CifgEnabled)
+    {
+        paramsInfo.m_InputToInputWeights = &(params.m_InputToInputWeights->GetInfo());
+        paramsInfo.m_RecurrentToInputWeights = &(params.m_RecurrentToInputWeights->GetInfo());
+        if (params.m_CellToInputWeights != nullptr)
+        {
+            paramsInfo.m_CellToInputWeights = &(params.m_CellToInputWeights->GetInfo());
+        }
+        paramsInfo.m_InputGateBias = &(params.m_InputGateBias->GetInfo());
+    }
+
+    if (desc.m_ProjectionEnabled)
+    {
+        paramsInfo.m_ProjectionWeights = &(params.m_ProjectionWeights->GetInfo());
+        if (params.m_ProjectionBias != nullptr)
+        {
+            paramsInfo.m_ProjectionBias = &(params.m_ProjectionBias->GetInfo());
+        }
+    }
+
+    if (desc.m_PeepholeEnabled)
+    {
+        paramsInfo.m_CellToForgetWeights = &(params.m_CellToForgetWeights->GetInfo());
+        paramsInfo.m_CellToOutputWeights = &(params.m_CellToOutputWeights->GetInfo());
+    }
+
+    if (desc.m_LayerNormEnabled)
+    {
+        if(!desc.m_CifgEnabled)
+        {
+            paramsInfo.m_InputLayerNormWeights = &(params.m_InputLayerNormWeights->GetInfo());
+        }
+        paramsInfo.m_ForgetLayerNormWeights = &(params.m_ForgetLayerNormWeights->GetInfo());
+        paramsInfo.m_CellLayerNormWeights = &(params.m_CellLayerNormWeights->GetInfo());
+        paramsInfo.m_OutputLayerNormWeights = &(params.m_OutputLayerNormWeights->GetInfo());
+    }
+
+    bool isSupported = false;
+    armnn::BackendId setBackend;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   IsUnidirectionalSequenceLstmSupported,
+                                   data.m_Backends,
+                                   isSupported,
+                                   setBackend,
+                                   inputInfo,
+                                   outputStateInInfo,
+                                   cellStateInInfo,
+                                   outputStateOutInfo,
+                                   cellStateOutInfo,
+                                   outputInfo,
+                                   desc,
+                                   paramsInfo);
+    };
+
+    bool isDynamic = false;
+    if (!IsDynamicTensor(outputInfo))
+    {
+        validateFunc(outputInfo, isSupported);
+    }
+    else
+    {
+        isDynamic = true;
+        isSupported = AreDynamicTensorsSupported();
+    }
+
+    if (!isSupported)
+    {
+        return false;
+    }
+
+    // Add the layer
+    IConnectableLayer* layer = data.m_Network->AddUnidirectionalSequenceLstmLayer(desc,
+                                                                                  params,
+                                                                                  "UnidirectionalSequenceLstm");
+    layer->SetBackendId(setBackend);
+
+    input.Connect(layer->GetInputSlot(0));
+    outputStateIn.Connect(layer->GetInputSlot(1));
+    cellStateIn.Connect(layer->GetInputSlot(2));
+
+    if (!isDynamic)
+    {
+        return (SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, 2, model, data));
+    }
+    else
+    {
+        return (SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, 2, model, data, nullptr,
+                                                        validateFunc, ActivationFn::kActivationNone, true));
+    }
+}
+
 } // armnn_driver namespace
 \ No newline at end of file
diff --git a/ConversionUtils_1_3.hpp b/ConversionUtils_1_3.hpp
index a7f00fc..761b189 100644
--- a/ConversionUtils_1_3.hpp
+++ b/ConversionUtils_1_3.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -131,10 +131,12 @@ bool ConvertFill(const HalOperation& operation, const HalModel& model, Conversio
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsFillSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                inputInfo,
                                outputInfo,
                                descriptor);
@@ -144,7 +146,11 @@ bool ConvertFill(const HalOperation& operation, const HalModel& model, Conversio
     }
 
     IConnectableLayer* const layer = data.m_Network->AddFillLayer(descriptor);
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the FillLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data);
@@ -153,6 +159,84 @@ bool ConvertFill(const HalOperation& operation, const HalModel& model, Conversio
 template<typename HalPolicy,
          typename HalOperation = typename HalPolicy::Operation,
          typename HalModel     = typename HalPolicy::Model>
+bool ConvertLogicalBinary(const HalOperation& operation,
+                          const HalModel& model,
+                          ConversionData& data,
+                          LogicalBinaryOperation logicalOperation)
+{
+    using HalOperand = typename HalPolicy::Operand;
+
+    ALOGV("HalPolicy::ConvertLogicalBinary()");
+    ALOGV("logicalOperation = %s", GetLogicalBinaryOperationAsCString(logicalOperation));
+
+    LayerInputHandle input0 = ConvertToLayerInputHandle<HalPolicy>(operation, 0, model, data);
+    LayerInputHandle input1 = ConvertToLayerInputHandle<HalPolicy>(operation, 1, model, data);
+
+    if (!(input0.IsValid() && input1.IsValid()))
+    {
+        return Fail("%s: Operation has invalid inputs", __func__);
+    }
+
+    const HalOperand* output = GetOutputOperand<HalPolicy>(operation, 0, model);
+    if (!output)
+    {
+        return Fail("%s: Could not read output 0", __func__);
+    }
+
+    const TensorInfo& inputInfo0 = input0.GetTensorInfo();
+    const TensorInfo& inputInfo1 = input1.GetTensorInfo();
+    const TensorInfo& outputInfo = GetTensorInfoForOperand(*output);
+
+    LogicalBinaryDescriptor descriptor(logicalOperation);
+
+    bool isSupported = false;
+    armnn::BackendId setBackend;
+    auto validateFunc = [&](const armnn::TensorInfo& outputInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   IsLogicalBinarySupported,
+                                   data.m_Backends,
+                                   isSupported,
+                                   setBackend,
+                                   inputInfo0,
+                                   inputInfo1,
+                                   outputInfo,
+                                   descriptor);
+    };
+
+    if(!IsDynamicTensor(outputInfo))
+    {
+        validateFunc(outputInfo, isSupported);
+    }
+    else
+    {
+        isSupported = AreDynamicTensorsSupported();
+    }
+
+    if (!isSupported)
+    {
+        return false;
+    }
+
+    IConnectableLayer* layer = data.m_Network->AddLogicalBinaryLayer(descriptor);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the LogicalBinaryLayer", __func__);
+    }
+
+    bool isReshapeSupported = BroadcastTensor(input0, input1, layer, data);
+    if (!isReshapeSupported)
+    {
+        return false;
+    }
+
+    return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, nullptr, validateFunc);
+}
+
+template<typename HalPolicy,
+         typename HalOperation = typename HalPolicy::Operation,
+         typename HalModel     = typename HalPolicy::Model>
 bool ConvertQuantizedLstm(const HalOperation& operation, const HalModel& model, ConversionData& data)
 {
     using HalOperand     = typename HalPolicy::Operand;
@@ -601,12 +685,14 @@ bool ConvertQuantizedLstm(const HalOperation& operation, const HalModel& model,
 
     // Check if the layer is supported
     bool isSupported = false;
+    armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& cellStateOutInfo, bool& isSupported)
     {
         FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                    IsQLstmSupported,
                                    data.m_Backends,
                                    isSupported,
+                                   setBackend,
                                    inputInfo,
                                    outputStatePrevTimeStepInfo,
                                    cellStatePrevTimeStepInfo,
@@ -637,6 +723,7 @@ bool ConvertQuantizedLstm(const HalOperation& operation, const HalModel& model,
 
     // Add the layer
     IConnectableLayer* layer = data.m_Network->AddQLstmLayer(desc, params, "QLstm");
+    layer->SetBackendId(setBackend);
 
     input.Connect(layer->GetInputSlot(0));
     outputStatePrevTimeStep.Connect(layer->GetInputSlot(1));
@@ -691,10 +778,12 @@ bool ConvertRank(const HalOperation& operation, const HalModel& model, Conversio
     }
 
     bool isSupported = false;
+    armnn::BackendId setBackend;
     FORWARD_LAYER_SUPPORT_FUNC(__func__,
                                IsRankSupported,
                                data.m_Backends,
                                isSupported,
+                               setBackend,
                                input.GetTensorInfo(),
                                outInfo);
     if (!isSupported)
@@ -703,7 +792,11 @@ bool ConvertRank(const HalOperation& operation, const HalModel& model, Conversio
     }
 
     armnn::IConnectableLayer* layer = data.m_Network->AddRankLayer();
-    assert(layer != nullptr);
+    layer->SetBackendId(setBackend);
+    if (!layer)
+    {
+        return Fail("%s: Could not add the RankLayer", __func__);
+    }
     input.Connect(layer->GetInputSlot(0));
 
     return SetupAndTrackLayerOutputSlot<HalPolicy>(operation, 0, *layer, model, data, &outInfo);
diff --git a/DriverOptions.cpp b/DriverOptions.cpp
index 6f777e3..42d932c 100644
--- a/DriverOptions.cpp
+++ b/DriverOptions.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -17,7 +17,6 @@
 #include <cxxopts/cxxopts.hpp>
 
 #include <algorithm>
-#include <cassert>
 #include <functional>
 #include <string>
 #include <sstream>
@@ -36,6 +35,13 @@ DriverOptions::DriverOptions(armnn::Compute computeDevice, bool fp16Enabled)
     , m_EnableGpuProfiling(false)
     , m_fp16Enabled(fp16Enabled)
     , m_FastMathEnabled(false)
+    , m_ShouldExit(false)
+    , m_SaveCachedNetwork(false)
+    , m_NumberOfThreads(0)
+    , m_EnableAsyncModelExecution(false)
+    , m_ArmnnNumberOfThreads(1)
+    , m_EnableImport(false)
+    , m_EnableExport(false)
 {
 }
 
@@ -47,6 +53,13 @@ DriverOptions::DriverOptions(const std::vector<armnn::BackendId>& backends, bool
     , m_EnableGpuProfiling(false)
     , m_fp16Enabled(fp16Enabled)
     , m_FastMathEnabled(false)
+    , m_ShouldExit(false)
+    , m_SaveCachedNetwork(false)
+    , m_NumberOfThreads(0)
+    , m_EnableAsyncModelExecution(false)
+    , m_ArmnnNumberOfThreads(1)
+    , m_EnableImport(false)
+    , m_EnableExport(false)
 {
 }
 
@@ -58,6 +71,12 @@ DriverOptions::DriverOptions(int argc, char** argv)
     , m_fp16Enabled(false)
     , m_FastMathEnabled(false)
     , m_ShouldExit(false)
+    , m_SaveCachedNetwork(false)
+    , m_NumberOfThreads(0)
+    , m_EnableAsyncModelExecution(false)
+    , m_ArmnnNumberOfThreads(1)
+    , m_EnableImport(false)
+    , m_EnableExport(false)
 {
     std::string unsupportedOperationsAsString;
     std::string clTunedParametersModeAsString;
@@ -98,6 +117,10 @@ DriverOptions::DriverOptions(int argc, char** argv)
          "the file accordingly.",
          cxxopts::value<std::string>(clTunedParametersModeAsString)->default_value("UseTunedParameters"))
 
+        ("g,mlgo-cl-tuned-parameters-file",
+        "If non-empty, the given file will be used to load/save MLGO CL tuned parameters. ",
+        cxxopts::value<std::string>(m_ClMLGOTunedParametersFile)->default_value(""))
+
         ("n,service-name",
          "If non-empty, the driver service name to be registered",
          cxxopts::value<std::string>(m_ServiceName)->default_value("armnn"))
@@ -112,6 +135,22 @@ DriverOptions::DriverOptions(int argc, char** argv)
         ("p,gpu-profiling", "Turns GPU profiling on",
          cxxopts::value<bool>(m_EnableGpuProfiling)->default_value("false"))
 
+        ("q,cached-network-file", "If non-empty, the given file will be used to load/save cached network. "
+                                   "If save-cached-network option is given will save the cached network to given file."
+                                   "If save-cached-network option is not given will load the cached network from given "
+                                   "file.",
+        cxxopts::value<std::string>(m_CachedNetworkFilePath)->default_value(""))
+
+        ("s,save-cached-network", "Enables saving the cached network to the file given with cached-network-file option."
+                                  " See also --cached-network-file",
+        cxxopts::value<bool>(m_SaveCachedNetwork)->default_value("false"))
+
+        ("number-of-threads",
+         "Assign the number of threads used by the CpuAcc backend. "
+         "Input value must be between 1 and 64. "
+         "Default is set to 0 (Backend will decide number of threads to use).",
+         cxxopts::value<unsigned int>(m_NumberOfThreads)->default_value("0"))
+
         ("t,cl-tuned-parameters-file",
          "If non-empty, the given file will be used to load/save CL tuned parameters. "
          "See also --cl-tuned-parameters-mode",
@@ -126,7 +165,22 @@ DriverOptions::DriverOptions(int argc, char** argv)
          cxxopts::value<bool>(m_VerboseLogging)->default_value("false"))
 
         ("V,version", "Show version information",
-         cxxopts::value<bool>(showVersion)->default_value("false"));
+         cxxopts::value<bool>(showVersion)->default_value("false"))
+
+        ("A,asyncModelExecution", "Enable AsynModel Execution",
+         cxxopts::value<bool>(m_EnableAsyncModelExecution)->default_value("false"))
+
+        ("T,armnn-threads",
+         "Assign the number of threads used by ArmNN. "
+         "Input value must be at least 1. "
+         "Default is set to 1.",
+         cxxopts::value<unsigned int>(m_ArmnnNumberOfThreads)->default_value("1"))
+
+        ("I,enableImport", "Enable Importing of input buffers",
+         cxxopts::value<bool>(m_EnableImport)->default_value("false"))
+
+        ("E,enableExport", "Enable Exporting of output buffers",
+         cxxopts::value<bool>(m_EnableExport)->default_value("false"));
     }
     catch (const std::exception& e)
     {
diff --git a/DriverOptions.hpp b/DriverOptions.hpp
index 1523652..ee68a94 100644
--- a/DriverOptions.hpp
+++ b/DriverOptions.hpp
@@ -28,6 +28,7 @@ public:
     const std::string& GetServiceName() const { return m_ServiceName; }
     const std::set<unsigned int>& GetForcedUnsupportedOperations() const { return m_ForcedUnsupportedOperations; }
     const std::string& GetClTunedParametersFile() const { return m_ClTunedParametersFile; }
+    const std::string& GetClMLGOTunedParametersFile() const { return m_ClMLGOTunedParametersFile; }
     armnn::IGpuAccTunedParameters::Mode GetClTunedParametersMode() const { return m_ClTunedParametersMode; }
     armnn::IGpuAccTunedParameters::TuningLevel GetClTuningLevel() const { return m_ClTuningLevel; }
     bool IsGpuProfilingEnabled() const { return m_EnableGpuProfiling; }
@@ -36,6 +37,13 @@ public:
     void SetBackends(const std::vector<armnn::BackendId>& backends) { m_Backends = backends; }
     bool ShouldExit() const { return m_ShouldExit; }
     int GetExitCode() const { return m_ExitCode; }
+    const std::string& GetCachedNetworkFilePath() const { return m_CachedNetworkFilePath; }
+    bool SaveCachedNetwork() const { return m_SaveCachedNetwork; }
+    unsigned int GetNumberOfThreads() const { return m_NumberOfThreads; }
+    bool isAsyncModelExecutionEnabled() const { return m_EnableAsyncModelExecution; };
+    unsigned int getNoOfArmnnThreads() const { return m_ArmnnNumberOfThreads; };
+    bool isImportEnabled() const { return m_EnableImport; };
+    bool isExportEnabled() const { return m_EnableExport; };
 
 private:
     std::vector<armnn::BackendId> m_Backends;
@@ -44,6 +52,7 @@ private:
     std::string m_ServiceName;
     std::set<unsigned int> m_ForcedUnsupportedOperations;
     std::string m_ClTunedParametersFile;
+    std::string m_ClMLGOTunedParametersFile;
     armnn::IGpuAccTunedParameters::Mode m_ClTunedParametersMode;
     armnn::IGpuAccTunedParameters::TuningLevel m_ClTuningLevel;
     bool m_EnableGpuProfiling;
@@ -51,6 +60,13 @@ private:
     bool m_FastMathEnabled;
     bool m_ShouldExit;
     int m_ExitCode;
+    std::string m_CachedNetworkFilePath;
+    bool m_SaveCachedNetwork;
+    unsigned int m_NumberOfThreads;
+    bool m_EnableAsyncModelExecution;
+    unsigned int m_ArmnnNumberOfThreads;
+    bool m_EnableImport;
+    bool m_EnableExport;
 };
 
 } // namespace armnn_driver
diff --git a/LICENSE.spdx b/LICENSE.spdx
new file mode 100644
index 0000000..985e90f
--- /dev/null
+++ b/LICENSE.spdx
@@ -0,0 +1,756 @@
+SPDXVersion: SPDX-2.1
+DataLicense: CC0-1.0
+SPDXID: SPDXRef-DOCUMENT
+DocumentName: android-nn-driver
+DocumentNamespace: http://spdx.org/spdxdocs/spdx-v2.1-52f0ad2d-5aaf-4639-824f-433fd3f75059
+Creator: Person: Anonymous ()
+Creator: Organization: Anonymous ()
+Creator: Tool: reuse-1.0.0
+Created: 2022-08-03T11:15:19Z
+CreatorComment: <text>This document was created automatically using available reuse information consistent with REUSE.</text>
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f8259108df712c403827fb71c15e65a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-a43f0edf0ce5df76c5fa0b729273ea77
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-bf1cf5f58ec86ae7e11a3c3c8b474034
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-cecaaa5d4dda56381f9f7a9cd61ff7e9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-f143870cf1668a0e1921bc40fdb66f54
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-00a23dbae66843f432e28118661839d4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3256f8866910230cf6a7a73811af0cf9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2cc451461228edab191f273bf18b1bb3
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-d98af2dbf0e85ae4bc5f7b43ea7a954a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-c88a629746090e9a0d3827215e2bfd51
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-cae6fc1961249d6881aca965eb58e004
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e46a1911e4d9059307bd22dc2d912cb1
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ea85c3000a503cbd9bd1b1f84e9fad86
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-4a1090b46195ace5630ea4a007850de7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-385bcf1658d8867efb2b32b1500f09d4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b521d1dfe6989e64400469fdebd831c9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8bc8b64290ab46c0fb3219b9ec81a869
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8463e614db8ba40e6e1945c7b4b18295
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b829f130ef058df6a26066043a87a7f2
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-df339a6ebf0b4933a48663cccf1cf20a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b850f00ee638309c5b8d15037fac6df6
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ca9a486e79c5c263b3d3f536a4565d83
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-67139f126de1f029b3bd651cd63b1bd6
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-be18403f7bc7bf15c37c7f64298b3c3d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f5b20dfa312b6e2f9a8e1bf2b0b3503
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b825919ce48fdb7516cbc362e6393f29
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-edaddcd2dd335d3ece1304a6554ba51d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-7856d093259efa6362ed229b843e3bf4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-965719fd5d318ab9d90f4ef97d6e5b0f
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-531050aa1791fd697a5dc40c9a177dd4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-6776eb54bb2ecc6d49e627dbe41622b7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b6f8185ee2082a086ab3c079d6b39244
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8507d62cd47bf366b75f14567d47267e
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ccf8d55a9e9058f0dcf7c363c703d32d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-092945a8bc4261b051b787bd591a7714
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-655f5885a812698027dcabea1589d5f4
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-649cb81767be10c5da180796b9ddb4a7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f7e9dd3a20f1713c3fdc385273c66fa
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-f32963290737441c8d622ba4d084b5c8
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-567e8339bfeba1cb3c6421c08837b35c
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-03ea74a5d4f4f43725bd95894fdec8dd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-8a7249a37de6af18cd2d550cff6010f9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-c71eb7532243e3ca46e017d669533dba
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-22c04f1ed16ebce9712667a1c38a372a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e11be25fc4287567b7a0fa7ac82f1bb9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-1dcf36d546829f1e3054e855e00ba041
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b712c2b9f228f16280dd638e9509a61d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e9e436995bae1a91cc8eca98e9b807fd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-07efeb8174fbf736a199dc31e12c5e72
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-37f8670d1beb81082869bc84b163efab
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-342850c8fa79d64844c1094ab1610c1c
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-147dd65486ab91d9c3c92070f0bfdeb0
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-de4084667ef7b12bc58e12fdd2191c69
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b2047de27559c407bef406aff2b40d94
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9f4bb06fba519fb76f51f8943f6ebae9
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-30b64008afa71760eabc7da521c1889d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-baf669b9e92e00c5362dbc82809d5bde
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-930a2fdcf68b3d60760bcd4b005da53a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ddc27b0006a37f1b787e8b6162daf3d8
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-37595def2f9768995c35e05cb7997d5b
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-815280de5c96988091018b623655b38f
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-d2adb23643a916d18c659c84820d7c6a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-29371c36e8e4df94cfddca2fc1d126ba
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-cd71ee2ca480910d96b8b0d8d11f7ad0
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-e66a2501ad56e8b4c34ec503ea107499
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-f0b928f9591fbb77b0fd4f581e328118
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-ec80697484956fb568cffa59d1176c99
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3ee20d00476f82587311b7af6755ba87
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-a2c1c0f69d8cae0e305c6588f5a1b7f1
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-a25ef944bedd8c0ebed511a2acf49ec5
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-6efb1f07d269b9e0980b06617c8a715b
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3f448b727744f3cb9b0779c05699e288
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-dde9ae64190be94ab573edb16facee76
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-740f541502a5a7bdef15c23c22f921dd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-815f4a4f014436097f35a2933a78c782
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-141d09b3dcfb086fa13ed4d1d7dc72c7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-b608c8e94bd050f57df20a7b3c3578f7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9af5a132c2718f7f9cd37780855d5dc7
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-835e7b213035e712703efb1919160385
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-3ac1d54ace29e86b09ef0f7040ce499a
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-d4683c7840fdbbe5db1883d20f39e36f
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-106e0283ae070f56090f2d8df4aa23bf
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-390a1372ba683ef9a6008dacad3d7189
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-7ce28875cc79ab21c78d42ecdb2ba887
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-9ea405115778d8bc6828cb805c59d965
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-4c471c4784291b124a329da98dbb5721
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-5aa36b3d6a82f93da9cc87f793dd596d
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2994f5b9a6418a8785a66cb0c974b6fd
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2cd09106f5468654db71e97033eda634
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-233ae563722a5bc6f6fd82d5ca664e9b
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-2d49afc0410844dd4d5fb753d0d2c4d8
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-05f43bb75f39ece251d8a20052bcfd61
+Relationship: SPDXRef-DOCUMENT describes SPDXRef-94799b14d1b2f5eb08bf3204677984f1
+
+FileName: ./.gitignore
+SPDXID: SPDXRef-9f8259108df712c403827fb71c15e65a
+FileChecksum: SHA1: 20e5140c5f3d7d2bb9447e20e86fc36899042ebc
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./1.0/ArmnnDriver.hpp
+SPDXID: SPDXRef-a43f0edf0ce5df76c5fa0b729273ea77
+FileChecksum: SHA1: a8f69427c8291cd8237349d637a4e8be518109c0
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-bf1cf5f58ec86ae7e11a3c3c8b474034
+FileChecksum: SHA1: f59f11e39817ddf3707b0ac10e512357b4c453dc
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-cecaaa5d4dda56381f9f7a9cd61ff7e9
+FileChecksum: SHA1: 9da9087071f65ca982a6107f2459cbd25e90f30b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/FullyConnected.hpp
+SPDXID: SPDXRef-f143870cf1668a0e1921bc40fdb66f54
+FileChecksum: SHA1: 637f01696648b3d45ff7fa923bdf9d1dc69d28af
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.0/HalPolicy.cpp
+SPDXID: SPDXRef-00a23dbae66843f432e28118661839d4
+FileChecksum: SHA1: 91f1c7cadf4a257ff227a828a399be98a418bbbe
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./1.0/HalPolicy.hpp
+SPDXID: SPDXRef-3256f8866910230cf6a7a73811af0cf9
+FileChecksum: SHA1: dcb315bcb1d3f3da0a9c3c0613b88927f478549f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/ArmnnDriver.hpp
+SPDXID: SPDXRef-2cc451461228edab191f273bf18b1bb3
+FileChecksum: SHA1: 77f4cf04748296e452f8af5d3b039122fdd128d7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-d98af2dbf0e85ae4bc5f7b43ea7a954a
+FileChecksum: SHA1: 0f1fd3966716c0b12250d126f9e81e7c4255250e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-c88a629746090e9a0d3827215e2bfd51
+FileChecksum: SHA1: e3d2b504055de7abe3bf788e90f575dd1025bd85
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/HalPolicy.cpp
+SPDXID: SPDXRef-cae6fc1961249d6881aca965eb58e004
+FileChecksum: SHA1: 3ce067043eaeb91850116ca6d96bf2cc157e0b25
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.1/HalPolicy.hpp
+SPDXID: SPDXRef-e46a1911e4d9059307bd22dc2d912cb1
+FileChecksum: SHA1: 9622c84ff7b21205560a852f6937c9f6294726fe
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/ArmnnDriver.hpp
+SPDXID: SPDXRef-ea85c3000a503cbd9bd1b1f84e9fad86
+FileChecksum: SHA1: a3139ec9145218e6135ba01a28915b7eaa978030
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-4a1090b46195ace5630ea4a007850de7
+FileChecksum: SHA1: 5314d9584fde5478496ae4a5e21dae2e4cff9b30
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-385bcf1658d8867efb2b32b1500f09d4
+FileChecksum: SHA1: da57c0309a90ccccc2548a5f5980646e8034fbc8
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/HalPolicy.cpp
+SPDXID: SPDXRef-b521d1dfe6989e64400469fdebd831c9
+FileChecksum: SHA1: 3c36a47c20cbccaff4ab066cdd70415588e7b2dc
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.2/HalPolicy.hpp
+SPDXID: SPDXRef-8bc8b64290ab46c0fb3219b9ec81a869
+FileChecksum: SHA1: a5f76412d0382b3a1dfa1cd660d6bcaf3611005c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/ArmnnDriver.hpp
+SPDXID: SPDXRef-8463e614db8ba40e6e1945c7b4b18295
+FileChecksum: SHA1: 2df533c72232ee6dd03485e24ea09f39d11936ab
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-b829f130ef058df6a26066043a87a7f2
+FileChecksum: SHA1: b2fc505c82f7818ac3c1724d9793c74f64499b4c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-df339a6ebf0b4933a48663cccf1cf20a
+FileChecksum: SHA1: cfc89a2aeb1019f6073a9608284874bf7f5fad1b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/HalPolicy.cpp
+SPDXID: SPDXRef-b850f00ee638309c5b8d15037fac6df6
+FileChecksum: SHA1: d40e4d1c8d2ddae7b405049a6c2925bf6920b066
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./1.3/HalPolicy.hpp
+SPDXID: SPDXRef-ca9a486e79c5c263b3d3f536a4565d83
+FileChecksum: SHA1: d5e11e86d3e7e9b7c411137d02b177d47878af65
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./Android.bp
+SPDXID: SPDXRef-67139f126de1f029b3bd651cd63b1bd6
+FileChecksum: SHA1: b6eee11f4b5dec7d0edc76cc6a7f3096c0626aeb
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 ARM Ltd. All rights reserved.</text>
+
+FileName: ./Android.mk
+SPDXID: SPDXRef-be18403f7bc7bf15c37c7f64298b3c3d
+FileChecksum: SHA1: d2c012577d70974378211d3ee8ea290a61b52fa7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2022 ARM Ltd. and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnDevice.cpp
+SPDXID: SPDXRef-9f5b20dfa312b6e2f9a8e1bf2b0b3503
+FileChecksum: SHA1: 5164605fe75cbebe13c4ae3c026789212710f4db
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDevice.hpp
+SPDXID: SPDXRef-b825919ce48fdb7516cbc362e6393f29
+FileChecksum: SHA1: 6e49e8fe37b39ca1381a403e227c664cb5c0f07e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDriver.hpp
+SPDXID: SPDXRef-edaddcd2dd335d3ece1304a6554ba51d
+FileChecksum: SHA1: 537df248b2cd55405b3df65d3808c3ef41fd0aa1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDriverImpl.cpp
+SPDXID: SPDXRef-7856d093259efa6362ed229b843e3bf4
+FileChecksum: SHA1: 36395b80efda8d6dce9a9e5fe4e9f45083eedf36
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnDriverImpl.hpp
+SPDXID: SPDXRef-965719fd5d318ab9d90f4ef97d6e5b0f
+FileChecksum: SHA1: 4e36f763d315c5db72d0cb0b4477411800abbd96
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel.cpp
+SPDXID: SPDXRef-531050aa1791fd697a5dc40c9a177dd4
+FileChecksum: SHA1: 440468c755b83402a9f308a7fc1e9bc860077c08
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel.hpp
+SPDXID: SPDXRef-6776eb54bb2ecc6d49e627dbe41622b7
+FileChecksum: SHA1: ba7af89983699d749ab6305415b7dcaba2b2c981
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_2.cpp
+SPDXID: SPDXRef-b6f8185ee2082a086ab3c079d6b39244
+FileChecksum: SHA1: e4d46acd9483cb285bc681a74c2f399593ea7f6c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_2.hpp
+SPDXID: SPDXRef-8507d62cd47bf366b75f14567d47267e
+FileChecksum: SHA1: 742b7ac96b7c7e96ec8b956214dd3211098d70b8
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_3.cpp
+SPDXID: SPDXRef-ccf8d55a9e9058f0dcf7c363c703d32d
+FileChecksum: SHA1: c70810b740e24cff02d7caf4f7f8aa5aab37cfb6
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ArmnnPreparedModel_1_3.hpp
+SPDXID: SPDXRef-092945a8bc4261b051b787bd591a7714
+FileChecksum: SHA1: 7b3b0fdfb2bb3fc4d0e5764171a3549a8c8978bd
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./CacheDataHandler.cpp
+SPDXID: SPDXRef-655f5885a812698027dcabea1589d5f4
+FileChecksum: SHA1: 445629336054df90c9099b9b75797ed6811dd779
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2021 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./CacheDataHandler.hpp
+SPDXID: SPDXRef-649cb81767be10c5da180796b9ddb4a7
+FileChecksum: SHA1: 26ae481a852dd13093a1c9edf25ba183f2d35fc5
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2021 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils.cpp
+SPDXID: SPDXRef-9f7e9dd3a20f1713c3fdc385273c66fa
+FileChecksum: SHA1: 0c8905ac2bb01f0be67c7ebe32d50554d88864a2
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils.hpp
+SPDXID: SPDXRef-f32963290737441c8d622ba4d084b5c8
+FileChecksum: SHA1: 8084c3cbe39122af1061c98cfd2872bbb363f281
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils_1_2.hpp
+SPDXID: SPDXRef-567e8339bfeba1cb3c6421c08837b35c
+FileChecksum: SHA1: bff1d8c18281b0d2a77b33a92f67acfb321c4a87
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ConversionUtils_1_3.hpp
+SPDXID: SPDXRef-03ea74a5d4f4f43725bd95894fdec8dd
+FileChecksum: SHA1: af14ef930e27aae57ae6f0cb7031b3fac5eb3009
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./DriverOptions.cpp
+SPDXID: SPDXRef-8a7249a37de6af18cd2d550cff6010f9
+FileChecksum: SHA1: d141aea6e242783c111d0a6d37e619fa7397d10d
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./DriverOptions.hpp
+SPDXID: SPDXRef-c71eb7532243e3ca46e017d669533dba
+FileChecksum: SHA1: 9684228ad74e75018325e92902a4d3dc871ef6a8
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./ModelToINetworkConverter.cpp
+SPDXID: SPDXRef-22c04f1ed16ebce9712667a1c38a372a
+FileChecksum: SHA1: abede3be1ee6b689512d4720f9a64bea307aeb6f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./ModelToINetworkConverter.hpp
+SPDXID: SPDXRef-e11be25fc4287567b7a0fa7ac82f1bb9
+FileChecksum: SHA1: 14a9c3b1b8196f05d82f514a584e5c57cf3f0f99
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./NnapiSupport.txt
+SPDXID: SPDXRef-1dcf36d546829f1e3054e855e00ba041
+FileChecksum: SHA1: 821af6773ff138f88246e2154b80c4c9ec8412d9
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./README.md
+SPDXID: SPDXRef-b712c2b9f228f16280dd638e9509a61d
+FileChecksum: SHA1: af3dcb14667d2fd936c87704d362704f18224c76
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018-2021 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./RequestThread.cpp
+SPDXID: SPDXRef-e9e436995bae1a91cc8eca98e9b807fd
+FileChecksum: SHA1: cb92a755b3081985429649b6a4baf5930d869b82
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./RequestThread.hpp
+SPDXID: SPDXRef-07efeb8174fbf736a199dc31e12c5e72
+FileChecksum: SHA1: 48c759a5bf72b34d9f8b4fb2b7e9722e923561d9
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./RequestThread_1_3.cpp
+SPDXID: SPDXRef-37f8670d1beb81082869bc84b163efab
+FileChecksum: SHA1: a1c383202dc7fea2c6a78d0065aeb2edf33eda10
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./RequestThread_1_3.hpp
+SPDXID: SPDXRef-342850c8fa79d64844c1094ab1610c1c
+FileChecksum: SHA1: f49a15fca5bfb1bdf4e5e4718ed01b3124047894
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd. All rights reserved.</text>
+
+FileName: ./SECURITY.md
+SPDXID: SPDXRef-147dd65486ab91d9c3c92070f0bfdeb0
+FileChecksum: SHA1: 5c7051d2c36aa1671d35aa1ef5f830be26929c47
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./SystemPropertiesUtils.hpp
+SPDXID: SPDXRef-de4084667ef7b12bc58e12fdd2191c69
+FileChecksum: SHA1: 6ad8daffbc795aad50bd00f316b40565323eeae1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./Utils.cpp
+SPDXID: SPDXRef-b2047de27559c407bef406aff2b40d94
+FileChecksum: SHA1: 76c8badff7865de215abe395e79e8a059a97ccb5
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./Utils.hpp
+SPDXID: SPDXRef-9f4bb06fba519fb76f51f8943f6ebae9
+FileChecksum: SHA1: 615c59ebc06d526534dadab7e4a9589a22933de1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./android-nn-driver.IVGCVSW-7090.patch
+SPDXID: SPDXRef-30b64008afa71760eabc7da521c1889d
+FileChecksum: SHA1: 35fbf73bed3f1eb2cad0de08475b88853cc918d7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright (c) <year> <copyright holders>
+Copyright © 2018, 2022 Arm Ltd and Contributors. All rights reserved.
+Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.0-service-armnn.rc
+SPDXID: SPDXRef-baf669b9e92e00c5362dbc82809d5bde
+FileChecksum: SHA1: 8fa18b080109a4e60839b5d1d635af28e9eef847
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.1-service-armnn.rc
+SPDXID: SPDXRef-930a2fdcf68b3d60760bcd4b005da53a
+FileChecksum: SHA1: 7f085302dd79d18f958c0779d1918871e55f816a
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.2-service-armnn.rc
+SPDXID: SPDXRef-ddc27b0006a37f1b787e8b6162daf3d8
+FileChecksum: SHA1: 14f30ecab9ddab0ca1589c140f7104835e83025f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2019-2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./android.hardware.neuralnetworks@1.3-service-armnn.rc
+SPDXID: SPDXRef-37595def2f9768995c35e05cb7997d5b
+FileChecksum: SHA1: 759eff0251977e333141d4383ff08bb41debd395
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./docs/FAQ.md
+SPDXID: SPDXRef-815280de5c96988091018b623655b38f
+FileChecksum: SHA1: ed059961071be063db41ea7378030e9279256725
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./docs/IntegratorGuide.md
+SPDXID: SPDXRef-d2adb23643a916d18c659c84820d7c6a
+FileChecksum: SHA1: 16ce5aa116603d2a009b209a91b57d73271d2c0b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./service.cpp
+SPDXID: SPDXRef-29371c36e8e4df94cfddca2fc1d126ba
+FileChecksum: SHA1: 763c8edce64123fcb36c182acbdaa8a7d1b44782
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd. All rights reserved.</text>
+
+FileName: ./setup.sh
+SPDXID: SPDXRef-cd71ee2ca480910d96b8b0d8d11f7ad0
+FileChecksum: SHA1: 531cfa9f242d7c718e8d07f56564c9782a046d4f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2018, 2020-022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.0/Convolution2D.cpp
+SPDXID: SPDXRef-e66a2501ad56e8b4c34ec503ea107499
+FileChecksum: SHA1: 128550351b04d32459edd674f9c9d669f57f1b06
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.0/FullyConnectedReshape.cpp
+SPDXID: SPDXRef-f0b928f9591fbb77b0fd4f581e328118
+FileChecksum: SHA1: 1ae9abf76553388bc2e6245fed9b8a65a8f62706
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.0/Lstm.cpp
+SPDXID: SPDXRef-ec80697484956fb568cffa59d1176c99
+FileChecksum: SHA1: 5727d5f855f102a6b6704847adf4944b28f48aef
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Convolution2D.cpp
+SPDXID: SPDXRef-3ee20d00476f82587311b7af6755ba87
+FileChecksum: SHA1: 03db94ba9ca8bd50266e46b9ad771ac776083414
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Lstm.cpp
+SPDXID: SPDXRef-a2c1c0f69d8cae0e305c6588f5a1b7f1
+FileChecksum: SHA1: bd8899a7ce02d366cde07b1bc09548e841b0289e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Mean.cpp
+SPDXID: SPDXRef-a25ef944bedd8c0ebed511a2acf49ec5
+FileChecksum: SHA1: 55a55089a6b6a1dfadb25254caf0c1c00464e81b
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.1/Transpose.cpp
+SPDXID: SPDXRef-6efb1f07d269b9e0980b06617c8a715b
+FileChecksum: SHA1: 5ff1b93fad61526f720be64cdb1db99cde3f3867
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/Capabilities.cpp
+SPDXID: SPDXRef-3f448b727744f3cb9b0779c05699e288
+FileChecksum: SHA1: af172eb6b3911c53764662e84ba4b54948566951
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/Dilation.cpp
+SPDXID: SPDXRef-dde9ae64190be94ab573edb16facee76
+FileChecksum: SHA1: a42f16142851e48f6c25ba62dfb59b85cbbd707c
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/Lstm.cpp
+SPDXID: SPDXRef-740f541502a5a7bdef15c23c22f921dd
+FileChecksum: SHA1: 417055ce2712481721b4610e0794ff45c4fad5e0
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.2/UnidirectionalSequenceLstm.cpp
+SPDXID: SPDXRef-815f4a4f014436097f35a2933a78c782
+FileChecksum: SHA1: ef80888be3ffee282501ae7b24112c1fe5aef530
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.3/QLstm.cpp
+SPDXID: SPDXRef-141d09b3dcfb086fa13ed4d1d7dc72c7
+FileChecksum: SHA1: b153faa2047b69fa50c61d8b571ae6c56f30a6c1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/1.3/QosTests.cpp
+SPDXID: SPDXRef-b608c8e94bd050f57df20a7b3c3578f7
+FileChecksum: SHA1: 581cd7da71d8c437eec87f7a6883639cf03c87d3
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2020 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Android.mk
+SPDXID: SPDXRef-9af5a132c2718f7f9cd37780855d5dc7
+FileChecksum: SHA1: 39ed4f7eb099c95e3b322ab33884addd8e8679a1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 ARM Ltd. All rights reserved.</text>
+
+FileName: ./test/Concat.cpp
+SPDXID: SPDXRef-835e7b213035e712703efb1919160385
+FileChecksum: SHA1: bf25dc393b3886450e887e3f00f1866be2ec6a91
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Concurrent.cpp
+SPDXID: SPDXRef-3ac1d54ace29e86b09ef0f7040ce499a
+FileChecksum: SHA1: f67c8bbb69c38ce9fc29e89780f183be49e9c7b7
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Convolution2D.hpp
+SPDXID: SPDXRef-d4683c7840fdbbe5db1883d20f39e36f
+FileChecksum: SHA1: b4bf0e39e5b1e754dbbb62d22ee8010edf14ed5e
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Dilation.hpp
+SPDXID: SPDXRef-106e0283ae070f56090f2d8df4aa23bf
+FileChecksum: SHA1: 065c5e5c9dde792785f7f58eb1498f216efb8cf1
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/DriverTestHelpers.cpp
+SPDXID: SPDXRef-390a1372ba683ef9a6008dacad3d7189
+FileChecksum: SHA1: baf7aa187cfa11c4b3a69a00a69262e5ced445bf
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/DriverTestHelpers.hpp
+SPDXID: SPDXRef-7ce28875cc79ab21c78d42ecdb2ba887
+FileChecksum: SHA1: fa779ca557ae7bebcfefecb1faded7ac783f5924
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/FullyConnected.cpp
+SPDXID: SPDXRef-9ea405115778d8bc6828cb805c59d965
+FileChecksum: SHA1: 4a21a85071eaaac20d5705b1bceae1219bff607f
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/GenericLayerTests.cpp
+SPDXID: SPDXRef-4c471c4784291b124a329da98dbb5721
+FileChecksum: SHA1: 0587067149b5065ef082a788ca5194e88653ec36
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Lstm.hpp
+SPDXID: SPDXRef-5aa36b3d6a82f93da9cc87f793dd596d
+FileChecksum: SHA1: 193309453c30280b2fbc96cf756d8b03e080b856
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/SystemProperties.cpp
+SPDXID: SPDXRef-2994f5b9a6418a8785a66cb0c974b6fd
+FileChecksum: SHA1: 34cc21f95a76e864d0d77e68bd374120ba50fd9d
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/TestTensor.cpp
+SPDXID: SPDXRef-2cd09106f5468654db71e97033eda634
+FileChecksum: SHA1: c2869d7fba41a2c04af6d3ee8813c28b66c3fdaf
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/TestTensor.hpp
+SPDXID: SPDXRef-233ae563722a5bc6f6fd82d5ca664e9b
+FileChecksum: SHA1: cf3e2de34c3b526aadba3fb64381f7b520c21c03
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/Tests.cpp
+SPDXID: SPDXRef-2d49afc0410844dd4d5fb753d0d2c4d8
+FileChecksum: SHA1: 90784980509b0d9b5cb27dc7486dae0c3f5818a2
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/UnidirectionalSequenceLstm.hpp
+SPDXID: SPDXRef-05f43bb75f39ece251d8a20052bcfd61
+FileChecksum: SHA1: e6eeadc76e06482371803a213b2ad9582456afac
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2022 Arm Ltd and Contributors. All rights reserved.</text>
+
+FileName: ./test/UtilsTests.cpp
+SPDXID: SPDXRef-94799b14d1b2f5eb08bf3204677984f1
+FileChecksum: SHA1: 42157f2abe26fd24b755df1695670408749327a6
+LicenseConcluded: NOASSERTION
+LicenseInfoInFile: MIT
+FileCopyrightText: <text>Copyright © 2017 Arm Ltd and Contributors. All rights reserved.</text>
diff --git a/LICENSES/MIT.txt b/LICENSES/MIT.txt
new file mode 100644
index 0000000..2071b23
--- /dev/null
+++ b/LICENSES/MIT.txt
@@ -0,0 +1,9 @@
+MIT License
+
+Copyright (c) <year> <copyright holders>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/ModelToINetworkConverter.cpp b/ModelToINetworkConverter.cpp
index e8cf8a8..2dc302e 100644
--- a/ModelToINetworkConverter.cpp
+++ b/ModelToINetworkConverter.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -11,6 +11,10 @@
 #include <log/log.h>
 #include <type_traits>
 
+#ifdef ARMNN_ANDROID_S
+#include <LegacyUtils.h>
+#endif
+
 namespace armnn_driver
 {
 
@@ -31,7 +35,6 @@ ModelToINetworkConverter<HalPolicy>::ModelToINetworkConverter(const std::vector<
     {
         m_ConversionResult = ConversionResult::UnsupportedFeature;
         ALOGE("%s: Unexpected exception: %s", __func__, e.what());
-        assert(false);
     }
 }
 
@@ -46,13 +49,18 @@ void ModelToINetworkConverter<HalPolicy>::Convert()
 
     // map the memory pool into shared pointers
     m_Data.m_MemPools.clear();
+#if !defined(ARMNN_ANDROID_S)
     if (!setRunTimePoolInfosFromHidlMemories(&m_Data.m_MemPools, m_Model.pools))
+#else
+    if (!setRunTimePoolInfosFromCanonicalMemories(&m_Data.m_MemPools, uncheckedConvert(m_Model.pools)))
+#endif
     {
         Fail("%s: Setting of run time pool infos from Hidl Memories has failed.", __func__);
         m_ConversionResult = ConversionResult::ErrorMappingPools;
         return;
     }
 
+
     uint32_t totalPoolSize = 0;
     for (auto&& pool : m_Model.pools)
     {
@@ -86,9 +94,9 @@ void ModelToINetworkConverter<HalPolicy>::Convert()
             ALOGV("ModelToINetworkConverter::Convert(): getMainModel(m_Model).operands[inputIndex];");
             const HalOperand& operand = getMainModel(m_Model).operands[inputIndex];
             ALOGV("ModelToINetworkConverter::Convert(): GetTensorInfoForOperand(operand)");
-            const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand);
-            ALOGV("ModelToINetworkConverter::Convert(): m_Data.m_Network->AddInputLayer(i)");
-            armnn::IConnectableLayer* layer = m_Data.m_Network->AddInputLayer(i);
+            const std::string layerName = "Input_" + std::to_string(i);
+            ALOGV("ModelToINetworkConverter::Convert(): m_Data.m_Network->AddInputLayer(i, layerName.c_str())");
+            armnn::IConnectableLayer* layer = m_Data.m_Network->AddInputLayer(i, layerName.c_str());
 
             ALOGV("ModelToINetworkConverter::Convert(): layer->GetOutputSlot(0)");
             armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
@@ -182,11 +190,15 @@ void ModelToINetworkConverter<HalPolicy>::Convert()
             {
                 // outputs in android nn are represented by operands
                 uint32_t outputIndex = getMainModel(m_Model).outputIndexes[i];
-                const HalOperand& operand = getMainModel(m_Model).operands[outputIndex];
-                const armnn::TensorInfo& tensor = GetTensorInfoForOperand(operand);
-                armnn::IConnectableLayer* layer = m_Data.m_Network->AddOutputLayer(i);
-
-                assert(m_Data.m_OutputSlotForOperand[outputIndex]);
+                const std::string layerName = "Output_" + std::to_string(i);
+                armnn::IConnectableLayer* layer = m_Data.m_Network->AddOutputLayer(i, layerName.c_str());
+
+                if (!m_Data.m_OutputSlotForOperand[outputIndex])
+                {
+                    Fail("%s: OutputSlot %i does not exist", __func__, outputIndex);
+                    m_ConversionResult = ConversionResult::UnsupportedFeature;
+                    break;
+                }
                 m_Data.m_OutputSlotForOperand[outputIndex]->Connect(layer->GetInputSlot(0));
             }
         }
@@ -202,7 +214,10 @@ template<typename HalPolicy>
 bool ModelToINetworkConverter<HalPolicy>::IsOperationSupported(uint32_t operationIndex) const
 {
     std::map<uint32_t, bool>::const_iterator it = m_OperationSupported.find(operationIndex);
-    assert(it != m_OperationSupported.end());
+    if (it == m_OperationSupported.end())
+    {
+        return Fail("%s: Unrecognised Operation Index: %i", __func__, operationIndex);
+    }
     return it->second;
 }
 
diff --git a/NnapiSupport.txt b/NnapiSupport.txt
index 2b6eaca..254e97b 100644
--- a/NnapiSupport.txt
+++ b/NnapiSupport.txt
@@ -14,73 +14,85 @@ For integration and usage documentation, please see README.md.
 The following AndroidNN HAL 1.0, 1.1, 1.2 and 1.3 operations are currently supported:
 
 AndroidNN operator           Tensor type supported
-ABS                          (FLOAT32)
-ADD                          (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-ARGMAX                       (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-ARGMIN                       (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-AVERAGE_POOL_2D              (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-BATCH_TO_SPACE_ND            (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+ABS                          (FLOAT32, FLOAT16, INT32)
+ADD                          (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+ARGMAX                       (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+ARGMIN                       (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+AVERAGE_POOL_2D              (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+BATCH_TO_SPACE_ND            (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+CAST                         (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM)
 CONCATENATION                (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+CHANNEL_SHUFFLE              (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 CONV_2D                      (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 DEPTH_TO_SPACE               (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 DEPTHWISE_CONV_2D            (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 DEQUANTIZE                   (FLOAT32 (output only), QUANT8_ASYMM and QUANT8_ASYMM_SIGNED (input only))
-DIV                          (FLOAT32, QUANT8_ASYMM)
-ELU                          (FLOAT32, QUANT8_ASYMM)
-EQUAL                        (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+DIV                          (FLOAT32, FLOAT16, INT32)
+ELU                          (FLOAT32, FLOAT16, QUANT8_ASYMM)
+EQUAL                        (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 EXP                          (FLOAT32, FLOAT16)
-EXPAND_DIMS                  (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+EXPAND_DIMS                  (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 FILL                         (FLOAT32, FLOAT16, INT32)
-FLOOR                        (FLOAT32)
+FLOOR                        (FLOAT32, FLOAT16)
 FULLY_CONNECTED              (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-GREATER                      (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-GREATER_EQUAL                (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-GROUPED_CONV_2D              (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-HARD_SWISH                   (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-INSTANCE_NORMALIZATION       (FLOAT32)
+GREATER                      (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+GREATER_EQUAL                (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+GROUPED_CONV_2D              (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+HARD_SWISH                   (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+INSTANCE_NORMALIZATION       (FLOAT32, FLOAT16)
 L2_NORMALIZATION             (FLOAT32)
-L2_POOL_2D                   (FLOAT32, QUANT8_ASYMM)
-LESS                         (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-LESS_EQUAL                   (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+L2_POOL_2D                   (FLOAT32, FLOAT16)
+LESS                         (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+LESS_EQUAL                   (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 LOCAL_RESPONSE_NORMALIZATION (FLOAT32)
-LOGISTIC                     (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-LOG_SOFTMAX                  (FLOAT32)
+LOG                          (FLOAT32, FLOAT16)
+LOGICAL_AND                  (BOOL8)
+LOGICAL_NOT                  (BOOL8)
+LOGICAL_OR                   (BOOL8)
+LOGISTIC                     (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+LOG_SOFTMAX                  (FLOAT32, FLOAT16)
 LSTM                         (FLOAT32)
-MAXIMUM                      (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MAX_POOL_2D                  (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MEAN                         (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MINIMUM                      (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-MUL                          (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-NEG                          (FLOAT32)
-NOT_EQUAL                    (FLOAT32, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MAXIMUM                      (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MAX_POOL_2D                  (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MEAN                         (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MINIMUM                      (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+MUL                          (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+NEG                          (FLOAT32, FLOAT16)
+NOT_EQUAL                    (BOOL8, FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 PAD                          (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-PAD_V2                       (FLOAT32, FLOAT16, QUANT8_ASYMM)
-PRELU                        (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+PAD_V2                       (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+PRELU                        (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 QUANTIZE                     (FLOAT32 (input only), QUANT8_ASYMM and QUANT8_ASYMM_SIGNED (output only))
 QUANTIZED_16BIT_LSTM         (QUANT8_ASYMM)
 QUANTIZED_LSTM               (QUANT8_ASYMM)
-RELU                         (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-RELU1                        (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-RELU6                        (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+REDUCE_MAX                   (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+REDUCE_MIN                   (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+REDUCE_PROD                  (FLOAT32, FLOAT16)
+REDUCE_SUM                   (FLOAT32, FLOAT16)
+RELU                         (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+RELU1                        (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+RELU6                        (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 RESHAPE                      (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-RESIZE_BILINEAR              (FLOAT32, QUANT8_ASYMM)
-RESIZE_NEAREST_NEIGHBOR      (FLOAT32, QUANT8_ASYMM)
-RSQRT                        (FLOAT32)
+RESIZE_BILINEAR              (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+RESIZE_NEAREST_NEIGHBOR      (FLOAT32, FLOAT16, QUANT8_ASYMM)
+RSQRT                        (FLOAT32, FLOAT16)
+SIN                          (FLOAT32, FLOAT16)
 SOFTMAX                      (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-SPACE_TO_BATCH_ND            (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+SPACE_TO_BATCH_ND            (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 SPACE_TO_DEPTH               (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-SQRT                         (FLOAT32)
+SQRT                         (FLOAT32, FLOAT16)
 SQUEEZE                      (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 STRIDED_SLICE                (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-SUB                          (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
-TANH                         (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+SUB                          (FLOAT32, FLOAT16, INT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+TANH                         (FLOAT32, FLOAT16, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 TRANSPOSE                    (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
 TRANSPOSE_CONV_2D            (FLOAT32, QUANT8_ASYMM, QUANT8_ASYMM_SIGNED)
+UNIDIRECTIONAL_SEQUENCE_LSTM (FLOAT32, FLOAT16)
 
 Where operations are not supported by the ArmNN Android NN Driver, the driver indicates this to the framework
 appropriately and the framework implements those operations using a CPU implementation.
 
 NOTE: By convention, only those tensor types have been listed above, which are fully supported across all
 ArmNN backends.
-    - FLOAT16 input tensors are partially supported on most HAL 1.2 operators on the GpuAcc and
-    CpuRef backends, however not on CpuAcc.
-\ No newline at end of file
+    - FLOAT16 input tensors are partially supported on most HAL 1.2 and 1.3 operators on the GpuAcc and
+    CpuRef backends, however not on CpuAcc.
diff --git a/NnapiSupport.txt.license b/NnapiSupport.txt.license
new file mode 100644
index 0000000..739dc2d
--- /dev/null
+++ b/NnapiSupport.txt.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/README.md b/README.md
index 35f2db0..9d5bfdd 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-# ArmNN Android Neural Networks driver
+# Arm NN Android Neural Networks driver
 
-This directory contains the ArmNN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0, android.hardware.neuralnetworks@1.1, android.hardware.neuralnetworks@1.2 and android.hardware.neuralnetworks@1.3 HALs.
+This directory contains the Arm NN driver for the Android Neural Networks API, implementing the android.hardware.neuralnetworks@1.0, android.hardware.neuralnetworks@1.1, android.hardware.neuralnetworks@1.2 and android.hardware.neuralnetworks@1.3 HALs.
 
 For more information about supported operations and configurations, see [NnapiSupport.txt](NnapiSupport.txt)
 
diff --git a/README.md.license b/README.md.license
new file mode 100644
index 0000000..c3a8823
--- /dev/null
+++ b/README.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018-2021 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/RequestThread.cpp b/RequestThread.cpp
index 927af92..783e351 100644
--- a/RequestThread.cpp
+++ b/RequestThread.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -17,8 +17,6 @@
 #include "ArmnnPreparedModel_1_3.hpp"
 #endif
 
-#include <armnn/utility/Assert.hpp>
-
 #include <log/log.h>
 
 using namespace android;
@@ -134,8 +132,7 @@ void RequestThread<PreparedModel, HalVersion, CallbackContext>::Process()
 
             default:
                 // this should be unreachable
-                ALOGE("RequestThread::Process() - invalid message type");
-                ARMNN_ASSERT_MSG(false, "ArmNN: RequestThread: invalid message type");
+                throw armnn::RuntimeException("ArmNN: RequestThread: invalid message type");
         }
     }
 }
diff --git a/RequestThread_1_3.cpp b/RequestThread_1_3.cpp
index 59fa70e..6133e29 100644
--- a/RequestThread_1_3.cpp
+++ b/RequestThread_1_3.cpp
@@ -1,15 +1,12 @@
 //
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #define LOG_TAG "ArmnnDriver"
 
-#include "RequestThread_1_3.hpp"
-
 #include "ArmnnPreparedModel_1_3.hpp"
-
-#include <armnn/utility/Assert.hpp>
+#include "RequestThread_1_3.hpp"
 
 #include <log/log.h>
 
@@ -178,8 +175,7 @@ void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::Process()
 
             default:
                 // this should be unreachable
-                ALOGE("RequestThread_1_3::Process() - invalid message type");
-                ARMNN_ASSERT_MSG(false, "ArmNN: RequestThread_1_3: invalid message type");
+                throw armnn::RuntimeException("ArmNN: RequestThread_1_3: invalid message type");
         }
     }
 }
diff --git a/SECURITY.md.license b/SECURITY.md.license
new file mode 100644
index 0000000..344cf33
--- /dev/null
+++ b/SECURITY.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/Utils.cpp b/Utils.cpp
index 1517b2a..48f06a9 100644
--- a/Utils.cpp
+++ b/Utils.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -8,22 +8,20 @@
 #include "Utils.hpp"
 #include "Half.hpp"
 
+#include <armnnSerializer/ISerializer.hpp>
+#include <armnnUtils/Filesystem.hpp>
 #include <armnnUtils/Permute.hpp>
 
 #include <armnn/Utils.hpp>
-#include <armnn/utility/Assert.hpp>
-#include <Filesystem.hpp>
 #include <log/log.h>
 
-#include <cassert>
 #include <cerrno>
 #include <cinttypes>
 #include <sstream>
 #include <cstdio>
 #include <time.h>
 #include <string>
-
-
+#include <span>
 
 using namespace android;
 using namespace android::hardware;
@@ -33,15 +31,30 @@ namespace armnn_driver
 {
 const armnn::PermutationVector g_DontPermute{};
 
-namespace
-{
-
-void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorShape& inTensorShape, const void* input,
-                                     void* output, size_t dataTypeSize, const armnn::PermutationVector& mappings)
+void SwizzleAndroidNn4dTensorToArmNn(armnn::TensorInfo& tensorInfo, const void* input, void* output,
+                                     const armnn::PermutationVector& mappings)
 {
-    assert(inTensorShape.GetNumDimensions() == 4U);
-
-    armnnUtils::Permute(armnnUtils::Permuted(inTensorShape, mappings), mappings, input, output, dataTypeSize);
+    if (tensorInfo.GetNumDimensions() != 4U)
+    {
+        throw armnn::InvalidArgumentException("NumDimensions must be 4");
+    }
+    armnn::DataType dataType = tensorInfo.GetDataType();
+    switch (dataType)
+    {
+    case armnn::DataType::Float16:
+    case armnn::DataType::Float32:
+    case armnn::DataType::QAsymmU8:
+    case armnn::DataType::QSymmS16:
+    case armnn::DataType::QSymmS8:
+    case armnn::DataType::QAsymmS8:
+        // First swizzle tensor info
+        tensorInfo = armnnUtils::Permuted(tensorInfo, mappings);
+        // Then swizzle tensor data
+        armnnUtils::Permute(tensorInfo.GetShape(), mappings, input, output, armnn::GetDataTypeSize(dataType));
+        break;
+    default:
+        throw armnn::InvalidArgumentException("Unknown DataType for swizzling");
+    }
 }
 
 template<typename Dimensions>
@@ -60,32 +73,13 @@ auto GetDimensionsSpecificity(const Dimensions& dimensions)
     return specificity;
 }
 
-} // anonymous namespace
-
-void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void* input, void* output,
-                                     const armnn::PermutationVector& mappings)
-{
-    assert(tensor.GetNumDimensions() == 4U);
-
-    armnn::DataType dataType = tensor.GetDataType();
-    switch (dataType)
-    {
-    case armnn::DataType::Float16:
-    case armnn::DataType::Float32:
-    case armnn::DataType::QAsymmU8:
-    case armnn::DataType::QSymmS8:
-    case armnn::DataType::QAsymmS8:
-        SwizzleAndroidNn4dTensorToArmNn(tensor.GetShape(), input, output, armnn::GetDataTypeSize(dataType), mappings);
-        break;
-    default:
-        ALOGW("Unknown armnn::DataType for swizzling");
-        assert(0);
-    }
-}
 void* GetMemoryFromPool(V1_0::DataLocation location, const std::vector<android::nn::RunTimePoolInfo>& memPools)
 {
     // find the location within the pool
-    assert(location.poolIndex < memPools.size());
+    if (location.poolIndex >= memPools.size())
+    {
+        throw armnn::InvalidArgumentException("The poolIndex is greater than the memPools size.");
+    }
 
     const android::nn::RunTimePoolInfo& memPool = memPools[location.poolIndex];
 
@@ -188,9 +182,10 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand)
 
     if (perChannel)
     {
-        // ExtraParams is expected to be of type channelQuant
-        ARMNN_ASSERT(operand.extraParams.getDiscriminator() ==
-                     V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant);
+        if (operand.extraParams.getDiscriminator() != V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant)
+        {
+            throw armnn::InvalidArgumentException("ExtraParams is expected to be of type channelQuant");
+        }
 
         auto perAxisQuantParams = operand.extraParams.channelQuant();
 
@@ -277,9 +272,10 @@ armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand)
     if (perChannel)
     {
         // ExtraParams is expected to be of type channelQuant
-        ARMNN_ASSERT(operand.extraParams.getDiscriminator() ==
-                     V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant);
-
+        if (operand.extraParams.getDiscriminator() != V1_2::Operand::ExtraParams::hidl_discriminator::channelQuant)
+        {
+            throw armnn::InvalidArgumentException("ExtraParams is expected to be of type channelQuant");
+        }
         auto perAxisQuantParams = operand.extraParams.channelQuant();
 
         ret.SetQuantizationScales(perAxisQuantParams.scales);
@@ -321,39 +317,27 @@ std::string GetOperandSummary(const V1_3::Operand& operand)
 
 #endif
 
-using DumpElementFunction = void (*)(const armnn::ConstTensor& tensor,
+template <typename TensorType>
+using DumpElementFunction = void (*)(const TensorType& tensor,
     unsigned int elementIndex,
     std::ofstream& fileStream);
 
 namespace
 {
-template <typename ElementType, typename PrintableType = ElementType>
-void DumpTensorElement(const armnn::ConstTensor& tensor, unsigned int elementIndex, std::ofstream& fileStream)
+template <typename TensorType, typename ElementType, typename PrintableType = ElementType>
+void DumpTensorElement(const TensorType& tensor, unsigned int elementIndex, std::ofstream& fileStream)
 {
     const ElementType* elements = reinterpret_cast<const ElementType*>(tensor.GetMemoryArea());
-    fileStream << static_cast<PrintableType>(elements[elementIndex]) << ",";
+    fileStream << static_cast<PrintableType>(elements[elementIndex]) << " ";
 }
 
-constexpr const char* MemoryLayoutString(const armnn::ConstTensor& tensor)
-{
-    const char* str = "";
-
-    switch (tensor.GetNumDimensions())
-    {
-        case 4:  { str = "(BHWC) "; break; }
-        case 3:  { str = "(HWC) "; break; }
-        case 2:  { str = "(HW) "; break; }
-        default: { str = ""; break; }
-    }
-
-    return str;
-}
 } // namespace
 
+template <typename TensorType>
 void DumpTensor(const std::string& dumpDir,
     const std::string& requestName,
     const std::string& tensorName,
-    const armnn::ConstTensor& tensor)
+    const TensorType& tensor)
 {
     // The dump directory must exist in advance.
     fs::path dumpPath = dumpDir;
@@ -368,38 +352,38 @@ void DumpTensor(const std::string& dumpDir,
         return;
     }
 
-    DumpElementFunction dumpElementFunction = nullptr;
+    DumpElementFunction<TensorType> dumpElementFunction = nullptr;
 
     switch (tensor.GetDataType())
     {
         case armnn::DataType::Float32:
         {
-            dumpElementFunction = &DumpTensorElement<float>;
+            dumpElementFunction = &DumpTensorElement<TensorType, float>;
             break;
         }
         case armnn::DataType::QAsymmU8:
         {
-            dumpElementFunction = &DumpTensorElement<uint8_t, uint32_t>;
+            dumpElementFunction = &DumpTensorElement<TensorType, uint8_t, uint32_t>;
             break;
         }
         case armnn::DataType::Signed32:
         {
-            dumpElementFunction = &DumpTensorElement<int32_t>;
+            dumpElementFunction = &DumpTensorElement<TensorType, int32_t>;
             break;
         }
         case armnn::DataType::Float16:
         {
-            dumpElementFunction = &DumpTensorElement<armnn::Half>;
+            dumpElementFunction = &DumpTensorElement<TensorType, armnn::Half>;
             break;
         }
         case armnn::DataType::QAsymmS8:
         {
-            dumpElementFunction = &DumpTensorElement<int8_t, int32_t>;
+            dumpElementFunction = &DumpTensorElement<TensorType, int8_t, int32_t>;
             break;
         }
         case armnn::DataType::Boolean:
         {
-            dumpElementFunction = &DumpTensorElement<bool>;
+            dumpElementFunction = &DumpTensorElement<TensorType, bool>;
             break;
         }
         default:
@@ -411,55 +395,53 @@ void DumpTensor(const std::string& dumpDir,
     if (dumpElementFunction != nullptr)
     {
         const unsigned int numDimensions = tensor.GetNumDimensions();
+        const armnn::TensorShape shape = tensor.GetShape();
 
-        const unsigned int batch = (numDimensions == 4) ? tensor.GetShape()[numDimensions - 4] : 1;
-
-        const unsigned int height = (numDimensions >= 3)
-                                    ? tensor.GetShape()[numDimensions - 3]
-                                    : (numDimensions >= 2) ? tensor.GetShape()[numDimensions - 2] : 1;
-
-        const unsigned int width = (numDimensions >= 3)
-                                   ? tensor.GetShape()[numDimensions - 2]
-                                   : (numDimensions >= 1) ? tensor.GetShape()[numDimensions - 1] : 0;
-
-        const unsigned int channels = (numDimensions >= 3) ? tensor.GetShape()[numDimensions - 1] : 1;
-
+        if (!shape.AreAllDimensionsSpecified())
+        {
+            fileStream << "Cannot dump tensor elements: not all dimensions are specified" << std::endl;
+            return;
+        }
         fileStream << "# Number of elements " << tensor.GetNumElements() << std::endl;
-        fileStream << "# Dimensions " << MemoryLayoutString(tensor);
-        fileStream << "[" << tensor.GetShape()[0];
-        for (unsigned int d = 1; d < numDimensions; d++)
+
+        if (numDimensions == 0)
+        {
+            fileStream << "# Shape []" << std::endl;
+            return;
+        }
+        fileStream << "# Shape [" << shape[0];
+        for (unsigned int d = 1; d < numDimensions; ++d)
         {
-            fileStream << "," << tensor.GetShape()[d];
+            fileStream << "," << shape[d];
         }
         fileStream << "]" << std::endl;
+        fileStream << "Each line contains the data of each of the elements of dimension0. In NCHW and NHWC, each line"
+                      " will be a batch" << std::endl << std::endl;
 
-        for (unsigned int e = 0, b = 0; b < batch; ++b)
+        // Split will create a new line after all elements of the first dimension
+        // (in a 4, 3, 2, 3 tensor, there will be 4 lines of 18 elements)
+        unsigned int split = 1;
+        if (numDimensions == 1)
         {
-            if (numDimensions >= 4)
+            split = shape[0];
+        }
+        else
+        {
+            for (unsigned int i = 1; i < numDimensions; ++i)
             {
-                fileStream << "# Batch " << b << std::endl;
+                split *= shape[i];
             }
-            for (unsigned int c = 0; c < channels; c++)
+        }
+
+        // Print all elements in the tensor
+        for (unsigned int elementIndex = 0; elementIndex < tensor.GetNumElements(); ++elementIndex)
+        {
+            (*dumpElementFunction)(tensor, elementIndex, fileStream);
+
+            if ( (elementIndex + 1) % split == 0 )
             {
-                if (numDimensions >= 3)
-                {
-                    fileStream << "# Channel " << c << std::endl;
-                }
-                for (unsigned int h = 0; h < height; h++)
-                {
-                    for (unsigned int w = 0; w < width; w++, e += channels)
-                    {
-                        (*dumpElementFunction)(tensor, e, fileStream);
-                    }
-                    fileStream << std::endl;
-                }
-                e -= channels - 1;
-                if (c < channels)
-                {
-                    e -= ((height * width) - 1) * channels;
-                }
+                fileStream << std::endl;
             }
-            fileStream << std::endl;
         }
         fileStream << std::endl;
     }
@@ -475,6 +457,17 @@ void DumpTensor(const std::string& dumpDir,
     }
 }
 
+
+template void DumpTensor<armnn::ConstTensor>(const std::string& dumpDir,
+                                             const std::string& requestName,
+                                             const std::string& tensorName,
+                                             const armnn::ConstTensor& tensor);
+
+template void DumpTensor<armnn::Tensor>(const std::string& dumpDir,
+                                        const std::string& requestName,
+                                        const std::string& tensorName,
+                                        const armnn::Tensor& tensor);
+
 void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
                                  const std::string& dumpDir,
                                  armnn::NetworkId networkId,
@@ -492,7 +485,11 @@ void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
         return;
     }
 
-    ARMNN_ASSERT(profiler);
+    if (!profiler)
+    {
+        ALOGW("profiler was null");
+        return;
+    }
 
     // Set the name of the output profiling file.
     fs::path dumpPath = dumpDir;
@@ -552,6 +549,63 @@ std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimize
     return fileName;
 }
 
+std::string SerializeNetwork(const armnn::INetwork& network,
+                             const std::string& dumpDir,
+                             std::vector<uint8_t>& dataCacheData,
+                             bool dataCachingActive)
+{
+    std::string fileName;
+    bool bSerializeToFile = true;
+    if (dumpDir.empty())
+    {
+        bSerializeToFile = false;
+    }
+    else
+    {
+        std::string timestamp = GetFileTimestamp();
+        if (timestamp.empty())
+        {
+            bSerializeToFile = false;
+        }
+    }
+    if (!bSerializeToFile && !dataCachingActive)
+    {
+        return fileName;
+    }
+
+    auto serializer(armnnSerializer::ISerializer::Create());
+    // Serialize the Network
+    serializer->Serialize(network);
+    if (dataCachingActive)
+    {
+        std::stringstream stream;
+        auto serialized = serializer->SaveSerializedToStream(stream);
+        if (serialized)
+        {
+            std::string const serializedString{stream.str()};
+            std::copy(serializedString.begin(), serializedString.end(), std::back_inserter(dataCacheData));
+        }
+    }
+
+    if (bSerializeToFile)
+    {
+        // Set the name of the output .armnn file.
+        fs::path dumpPath = dumpDir;
+        std::string timestamp = GetFileTimestamp();
+        fs::path tempFilePath = dumpPath / (timestamp + "_network.armnn");
+        fileName = tempFilePath.string();
+
+        // Save serialized network to a file
+        std::ofstream serializedFile(fileName, std::ios::out | std::ios::binary);
+        auto serialized = serializer->SaveSerializedToStream(serializedFile);
+        if (!serialized)
+        {
+            ALOGW("An error occurred when serializing to file %s", fileName.c_str());
+        }
+    }
+    return fileName;
+}
+
 bool IsDynamicTensor(const armnn::TensorInfo& tensorInfo)
 {
     if (tensorInfo.GetShape().GetDimensionality() == armnn::Dimensionality::NotSpecified)
@@ -575,6 +629,53 @@ bool AreDynamicTensorsSupported()
 #endif
 }
 
+bool isQuantizedOperand(const V1_0::OperandType& operandType)
+{
+    if (operandType == V1_0::OperandType::TENSOR_QUANT8_ASYMM)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+
+#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3)// Using ::android::hardware::neuralnetworks::V1_2
+bool isQuantizedOperand(const V1_2::OperandType& operandType)
+{
+    if (operandType == V1_2::OperandType::TENSOR_QUANT8_ASYMM ||
+        operandType == V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+        operandType == V1_2::OperandType::TENSOR_QUANT8_SYMM ||
+        operandType == V1_2::OperandType::TENSOR_QUANT16_SYMM )
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+#endif
+
+#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3
+bool isQuantizedOperand(const V1_3::OperandType& operandType)
+{
+    if (operandType == V1_3::OperandType::TENSOR_QUANT8_ASYMM ||
+        operandType == V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL ||
+        operandType == V1_3::OperandType::TENSOR_QUANT8_SYMM ||
+        operandType == V1_3::OperandType::TENSOR_QUANT16_SYMM ||
+        operandType == V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED)
+    {
+        return true;
+    }
+    else
+    {
+        return false;
+    }
+}
+#endif
+
 std::string GetFileTimestamp()
 {
     // used to get a timestamp to name diagnostic files (the ArmNN serialized graph
@@ -593,25 +694,37 @@ std::string GetFileTimestamp()
     return ss.str();
 }
 
-void RenameGraphDotFile(const std::string& oldName, const std::string& dumpDir, const armnn::NetworkId networkId)
+void RenameExportedFiles(const std::string& existingSerializedFileName,
+                         const std::string& existingDotFileName,
+                         const std::string& dumpDir,
+                         const armnn::NetworkId networkId)
 {
     if (dumpDir.empty())
     {
         return;
     }
-    if (oldName.empty())
+    RenameFile(existingSerializedFileName, std::string("_network.armnn"), dumpDir, networkId);
+    RenameFile(existingDotFileName, std::string("_networkgraph.dot"), dumpDir, networkId);
+}
+
+void RenameFile(const std::string& existingName,
+                const std::string& extension,
+                const std::string& dumpDir,
+                const armnn::NetworkId networkId)
+{
+    if (existingName.empty() || dumpDir.empty())
     {
         return;
     }
-    fs::path dumpPath = dumpDir;
-    const fs::path newFileName = dumpPath / (std::to_string(networkId) + "_networkgraph.dot");
 
-    int iRet = rename(oldName.c_str(), newFileName.c_str());
+    fs::path dumpPath = dumpDir;
+    const fs::path newFileName = dumpPath / (std::to_string(networkId) + extension);
+    int iRet = rename(existingName.c_str(), newFileName.c_str());
     if (iRet != 0)
     {
         std::stringstream ss;
-        ss << "rename of [" << oldName << "] to [" << newFileName << "] failed with errno " << std::to_string(errno)
-           << " : " << std::strerror(errno);
+        ss << "rename of [" << existingName << "] to [" << newFileName << "] failed with errno "
+           << std::to_string(errno) << " : " << std::strerror(errno);
         ALOGW(ss.str().c_str());
     }
 }
@@ -629,11 +742,74 @@ void CommitPools(std::vector<::android::nn::RunTimePoolInfo>& memPools)
     {
         // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
         // update() has been removed and flush() added.
-#if defined(ARMNN_ANDROID_R) // Use the new Android implementation.
+#if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
         pool.flush();
 #else
         pool.update();
 #endif
     }
 }
+
+size_t GetSize(const V1_0::Request& request, const V1_0::RequestArgument& requestArgument)
+{
+    return request.pools[requestArgument.location.poolIndex].size();
+}
+
+#ifdef ARMNN_ANDROID_NN_V1_3
+size_t GetSize(const V1_3::Request& request, const V1_0::RequestArgument& requestArgument)
+{
+    if (request.pools[requestArgument.location.poolIndex].getDiscriminator() ==
+        V1_3::Request::MemoryPool::hidl_discriminator::hidlMemory)
+    {
+        return request.pools[requestArgument.location.poolIndex].hidlMemory().size();
+    }
+    else
+    {
+        return 0;
+    }
+}
+#endif
+
+template <typename ErrorStatus, typename Request>
+ErrorStatus ValidateRequestArgument(const Request& request,
+                                    const armnn::TensorInfo& tensorInfo,
+                                    const V1_0::RequestArgument& requestArgument,
+                                    std::string descString)
+{
+    if (requestArgument.location.poolIndex >= request.pools.size())
+    {
+        std::string err = fmt::format("Invalid {} pool at index {} the pool index is greater than the number "
+                                      "of available pools {}",
+                                      descString, requestArgument.location.poolIndex, request.pools.size());
+        ALOGE(err.c_str());
+        return ErrorStatus::GENERAL_FAILURE;
+    }
+    const size_t size = GetSize(request, requestArgument);
+    size_t totalLength = tensorInfo.GetNumBytes();
+
+    if (static_cast<size_t>(requestArgument.location.offset) + totalLength > size)
+    {
+        std::string err = fmt::format("Invalid {} pool at index {} the offset {} and length {} are greater "
+                                      "than the pool size {}", descString, requestArgument.location.poolIndex,
+                                      requestArgument.location.offset, totalLength, size);
+        ALOGE(err.c_str());
+        return ErrorStatus::GENERAL_FAILURE;
+    }
+    return ErrorStatus::NONE;
+}
+
+template V1_0::ErrorStatus ValidateRequestArgument<V1_0::ErrorStatus, V1_0::Request>(
+        const V1_0::Request& request,
+        const armnn::TensorInfo& tensorInfo,
+        const V1_0::RequestArgument& requestArgument,
+        std::string descString);
+
+#ifdef ARMNN_ANDROID_NN_V1_3
+template V1_3::ErrorStatus ValidateRequestArgument<V1_3::ErrorStatus, V1_3::Request>(
+        const V1_3::Request& request,
+        const armnn::TensorInfo& tensorInfo,
+        const V1_0::RequestArgument& requestArgument,
+        std::string descString);
+#endif
+
 } // namespace armnn_driver
diff --git a/Utils.hpp b/Utils.hpp
index 0a872b7..81be984 100644
--- a/Utils.hpp
+++ b/Utils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2021,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -8,9 +8,10 @@
 
 #include <CpuExecutor.h>
 #include <HalInterfaces.h>
-#include <LegacyHalUtils.h>
 #include <NeuralNetworks.h>
-#include "NamespaceAdaptor.hpp"
+#include <Utils.h>
+
+#include <fmt/format.h>
 
 #include <vector>
 #include <string>
@@ -31,9 +32,7 @@ namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
 namespace armnn_driver
 {
 
-#ifdef ARMNN_ANDROID_S
-using DataLocation = ::android::nn::DataLocation;
-#elif ARMNN_ANDROID_R
+#ifdef ARMNN_ANDROID_R
 using DataLocation = ::android::nn::hal::DataLocation;
 #endif
 
@@ -63,7 +62,7 @@ public:
 };
 
 /// Swizzles tensor data in @a input according to the dimension mappings.
-void SwizzleAndroidNn4dTensorToArmNn(const armnn::TensorInfo& tensor, const void* input, void* output,
+void SwizzleAndroidNn4dTensorToArmNn(armnn::TensorInfo& tensor, const void* input, void* output,
                                      const armnn::PermutationVector& mappings);
 
 /// Returns a pointer to a specific location in a pool
@@ -73,22 +72,25 @@ void* GetMemoryFromPool(V1_0::DataLocation location,
 /// Can throw UnsupportedOperand
 armnn::TensorInfo GetTensorInfoForOperand(const V1_0::Operand& operand);
 
-#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2
-armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand);
-#endif
-
-#ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3
-armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand);
-#endif
-
 std::string GetOperandSummary(const V1_0::Operand& operand);
 
+// Returns true for any quantized data type, false for the rest.
+bool isQuantizedOperand(const V1_0::OperandType& operandType);
+
 #if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3) // Using ::android::hardware::neuralnetworks::V1_2
+armnn::TensorInfo GetTensorInfoForOperand(const V1_2::Operand& operand);
+
 std::string GetOperandSummary(const V1_2::Operand& operand);
+
+bool isQuantizedOperand(const V1_2::OperandType& operandType);
 #endif
 
 #ifdef ARMNN_ANDROID_NN_V1_3 // Using ::android::hardware::neuralnetworks::V1_3
+armnn::TensorInfo GetTensorInfoForOperand(const V1_3::Operand& operand);
+
 std::string GetOperandSummary(const V1_3::Operand& operand);
+
+bool isQuantizedOperand(const V1_3::OperandType& operandType);
 #endif
 
 template <typename HalModel>
@@ -126,10 +128,11 @@ std::string GetModelSummary(const HalModel& model)
     return result.str();
 }
 
+template <typename TensorType>
 void DumpTensor(const std::string& dumpDir,
                 const std::string& requestName,
                 const std::string& tensorName,
-                const armnn::ConstTensor& tensor);
+                const TensorType& tensor);
 
 void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
                                  const std::string& dumpDir,
@@ -139,7 +142,20 @@ void DumpJsonProfilingIfRequired(bool gpuProfilingEnabled,
 std::string ExportNetworkGraphToDotFile(const armnn::IOptimizedNetwork& optimizedNetwork,
                                         const std::string& dumpDir);
 
-void RenameGraphDotFile(const std::string& oldName, const std::string& dumpDir, const armnn::NetworkId networkId);
+std::string SerializeNetwork(const armnn::INetwork& network,
+                             const std::string& dumpDir,
+                             std::vector<uint8_t>& dataCacheData,
+                             bool dataCachingActive = true);
+
+void RenameExportedFiles(const std::string& existingSerializedFileName,
+                         const std::string& existingDotFileName,
+                         const std::string& dumpDir,
+                         const armnn::NetworkId networkId);
+
+void RenameFile(const std::string& existingName,
+                const std::string& extension,
+                const std::string& dumpDir,
+                const armnn::NetworkId networkId);
 
 /// Checks if a tensor info represents a dynamic tensor
 bool IsDynamicTensor(const armnn::TensorInfo& outputInfo);
@@ -180,4 +196,9 @@ inline V1_2::OutputShape ComputeShape(const armnn::TensorInfo& info)
 
 void CommitPools(std::vector<::android::nn::RunTimePoolInfo>& memPools);
 
+template <typename ErrorStatus, typename Request>
+ErrorStatus ValidateRequestArgument(const Request& request,
+                                    const armnn::TensorInfo& tensorInfo,
+                                    const V1_0::RequestArgument& requestArgument,
+                                    std::string descString);
 } // namespace armnn_driver
diff --git a/android.hardware.neuralnetworks@1.0-service-armnn.rc.license b/android.hardware.neuralnetworks@1.0-service-armnn.rc.license
new file mode 100644
index 0000000..ce68ac4
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.0-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/android.hardware.neuralnetworks@1.1-service-armnn.rc.license b/android.hardware.neuralnetworks@1.1-service-armnn.rc.license
new file mode 100644
index 0000000..ce68ac4
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.1-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2018, 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/android.hardware.neuralnetworks@1.2-service-armnn.rc.license b/android.hardware.neuralnetworks@1.2-service-armnn.rc.license
new file mode 100644
index 0000000..8cbac67
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.2-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2019-2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/android.hardware.neuralnetworks@1.3-service-armnn.rc b/android.hardware.neuralnetworks@1.3-service-armnn.rc
index eaf3616..1dacf79 100644
--- a/android.hardware.neuralnetworks@1.3-service-armnn.rc
+++ b/android.hardware.neuralnetworks@1.3-service-armnn.rc
@@ -1,4 +1,4 @@
-service neuralnetworks_hal_service_armnn /vendor/bin/hw/android.hardware.neuralnetworks@1.3-service-armnn -c GpuAcc -n google-armnn
+service hal_neuralnetworks_armnn /vendor/bin/hw/android.hardware.neuralnetworks@1.3-service-armnn -c GpuAcc -n google-armnn
     class hal
     user system
     group system
diff --git a/android.hardware.neuralnetworks@1.3-service-armnn.rc.license b/android.hardware.neuralnetworks@1.3-service-armnn.rc.license
new file mode 100644
index 0000000..344cf33
--- /dev/null
+++ b/android.hardware.neuralnetworks@1.3-service-armnn.rc.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/docs/FAQ.md b/docs/FAQ.md
index 8402571..9b6f099 100644
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
@@ -17,32 +17,6 @@ https://android.googlesource.com/platform/test/vts-testcase/hal/+/f74899c6c09b52
 
 An acceptable workaround is to increase the timeout defined in AndroidTest.xml, in a similar way to https://android.googlesource.com/platform/test/vts-testcase/hal/+/f74899c6c09b52703e6db0323dffb4ae52539db4.
 
-Problems seen when trying to build the android-nn-driver obtained from GitHub
------------------------------------------------------------------------------
-
-Some users have encountered difficulties when attempting to build copies of the android-nn-driver obtained from GitHub. The build reports missing module source paths from armnn, clframework or boost_1_64_0. These errors can look like this:
-
-'error: vendor/arm/android-nn-driver/Android.bp:45:1: variant "android_arm64_armv7": module "armnn-arm_compute" "module source path "vendor/arm/android-nn-driver/clframework/build/android-arm64v8a/src/core/CL" does not exist'
-
-These errors are due to missing dependencies or incompatiblities between the android-nn-driver and armnn or clframework versions. The android-nn-driver requires boost_1_64_0 to build unit tests. The versions of android-nn-driver, armnn and clframework will have to match for them to work together. For example, the 19.08 version of android-nn-driver, clframework and armnn will work together but none of them will work with earlier or later versions of the others.
-
-In order to ensure that the correct versions of boost, armnn and the clframework are obtained you can do the following:
-
-1. Delete or move any boost, armnn or clframework directories from the android-nn-driver directory.
-2. Run the setup.sh script in the android-nn-driver directory. 
-
-This will download the correct versions of boost, armnn and the clframework and the android-nn-driver should build correctly. Alternatively you can go to the GitHub pages for android-nn-driver, armnn and computelibrary (clframework) and download versions with the same release tag.
-
-As an example, for 20.05 these would be:
-
-https://github.com/ARM-software/android-nn-driver/tree/v20.05
-https://github.com/ARM-software/armnn/tree/v20.05
-https://github.com/ARM-software/computelibrary/tree/v20.05
-
-The correct version of boost (1_64_0) can be downloaded from:
-
-https://www.boost.org/
-
 Instance Normalization test failures 
 ------------------------------------
 
@@ -51,7 +25,7 @@ There is a known issue in the Android NNAPI implementation of Instance Normaliza
 VTS and CTS test failures
 -------------------------
 
-With the release of the Android 10 R2 CTS some errors and crashes were discovered in the 19.08 and 19.11 releases of armnn, the android-nn-driver and ComputeLibrary. 19.08.01 and 19.11.01 releases of armnn, the android-nn-driver and ComputeLibrary were prepared that fix all these issues on CpuAcc and GpuAcc. If using 19.08 or 19.11 we recommend that you upgrade to the 19.08.01 or 19.11.01 releases. These issues have also been fixed in the 20.02 and later releases of armnn, the android-nn-driver and ComputeLibrary.
+With Android 10 R2 CTS some errors and crashes were discovered in the 19.08 and 19.11 releases of armnn, the android-nn-driver and ComputeLibrary. 19.08.01 and 19.11.01 releases of armnn, the android-nn-driver and ComputeLibrary were prepared that fix all these issues on CpuAcc and GpuAcc. If using 19.08 or 19.11 we recommend that you upgrade to the latest releases.
 
 These fixes also required patches to be made to the Android Q test framework. You may encounter CTS and VTS test failures when attempting to build copies of the android-nn-driver against older versions of Android Q.
 
@@ -69,4 +43,3 @@ In order to fix these failures you will have to update to a version of Android Q
 The Android 10 R3 CTS that can be downloaded from https://source.android.com/compatibility/cts/downloads contains all these patches. 
 
 There is a known issue that even with these patches CTS tests "TestRandomGraph/RandomGraphTest#LargeGraph_TENSOR_FLOAT16_Rank3/41" and "TestRandomGraph/RandomGraphTest#LargeGraph_TENSOR_FLOAT16_Rank2/20 " will still fail on CpuRef. These failures are caused by a LogSoftmax layer followed by a Floor layer which blows up the slight difference between fp16 to fp32. This issue only affects CpuRef with Android Q. These tests are not failing for Android R.
-
diff --git a/docs/FAQ.md.license b/docs/FAQ.md.license
new file mode 100644
index 0000000..68a3f51
--- /dev/null
+++ b/docs/FAQ.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/docs/IntegratorGuide.md b/docs/IntegratorGuide.md
index 2d4ebdb..d3587fb 100644
--- a/docs/IntegratorGuide.md
+++ b/docs/IntegratorGuide.md
@@ -5,91 +5,72 @@ This document describes how to integrate the Arm NN Android NNAPI driver into an
 
 ### Prerequisites
 
-1. Android source tree for Android P (we have tested against Android P version 9.0.0_r3) , in the directory `<ANDROID_ROOT>`
-2. Android source tree for Android Q (we have tested against Android Q version 10.0.0_r39), in the directory `<ANDROID_ROOT>`
+1. Android source tree for Android Q (we have tested against Android Q version 10.0.0_r39), in the directory `<ANDROID_ROOT>`
 2. Android source tree for Android R (we have tested against Android R version 11.0.0_r3), in the directory `<ANDROID_ROOT>`
-3. Mali OpenCL driver integrated into the Android source tree
+3. Android source tree for Android S (we have tested against Android S version 12.0.0_r1), in the directory `<ANDROID_ROOT>`
+4. Android source tree for Android T (we have tested against Android T pre-release tag - TP1A.220624.003), in the directory `<ANDROID_ROOT>`
+5. Mali OpenCL driver integrated into the Android source tree
 
 ### Procedure
 
 1. Place this source directory at `<ANDROID_ROOT>/vendor/arm/android-nn-driver`
 2. Run setup.sh
-3. Update the Android build environment to add the ArmNN driver. This ensures that the driver service
+3. Update the Android build environment to add the Arm NN driver. This ensures that the driver service
 is built and copied to the `system/vendor/bin/hw` directory in the Android image.
 To update the build environment, add to the contents of the variable `PRODUCT_PACKAGES`
 within the device-specific makefile that is located in the `<ANDROID_ROOT>/device/<manufacturer>/<product>`
 directory. This file is normally called `device.mk`:
 
-For Android P, Q or R, using NN API version (1.0), the following should be added to `device.mk`:
-<pre>
-PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.0-service-armnn
-</pre>
-
-For Android P, Q or R, a new version of the NN API is available (1.1),
-thus the following should be added to `device.mk` instead:
-<pre>
-PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.1-service-armnn
-</pre> `Android.mk` contains the module definition of both versions of the ArmNN driver.
+`Android.mk` contains the module definition of all versions (1.1, 1.2 and 1.3) of the Arm NN driver.
 
 For Android Q, a new version of the NN API is available (1.2),
 thus the following should be added to `device.mk` instead:
 <pre>
 PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.2-service-armnn
-</pre> `Android.mk` contains the module definition of all three versions (1.0, 1.1 and 1.2) of the ArmNN driver.
+</pre>
 
-For Android R, new version of the NN API is available (1.3),
+For Android R, S and T, new version of the NN API is available (1.3),
 thus the following should be added to `device.mk` instead:
 <pre>
 PRODUCT_PACKAGES += android.hardware.neuralnetworks@1.3-service-armnn
 </pre>
-`Android.mk` contains the module definition of all versions (1.0, 1.1, 1.2 and 1.3) of the ArmNN driver.
 
-Similarly, the Neon, CL or reference backend can be enabled/disabled by setting ARMNN_COMPUTE_CL_ENABLE,
+Similarly, the Neon, CL or Reference backend can be enabled/disabled by setting ARMNN_COMPUTE_CL_ENABLE,
 ARMNN_COMPUTE_NEON_ENABLE or ARMNN_REF_ENABLE in `device.mk`:
 <pre>
 ARMNN_COMPUTE_CL_ENABLE := 1
 </pre>
 
-For Android P, Q and R the vendor manifest.xml requires the Neural Network HAL information.
-For Android P use HAL version 1.1 as below. For Android Q substitute 1.2 where necessary. For Android R substitute 1.3 where necessary.
+For all Android versions the vendor manifest.xml requires the Neural Network HAL information.
+For Android Q use HAL version 1.2 as below. For later Android versions substitute 1.3 where necessary.
 ```xml
 <hal format="hidl">
     <name>android.hardware.neuralnetworks</name>
     <transport>hwbinder</transport>
-    <version>1.1</version>
+    <version>1.2</version>
     <interface>
         <name>IDevice</name>
         <instance>armnn</instance>
     </interface>
-    <fqname>@1.1::IDevice/armnn</fqname>
+    <fqname>@1.2::IDevice/armnn</fqname>
 </hal>
 ```
 
-4. Build Android as normal, i.e. run `make` in `<ANDROID_ROOT>`
-5. To confirm that the ArmNN driver has been built, check for driver service executable at
-
-Android P
-<pre>
-<ANDROID_ROOT>/out/target/product/<product>/system/vendor/bin/hw
-</pre>
-For example, if the ArmNN driver has been built with the NN API 1.0, check for the following file:
-<pre>
-<ANDROID_ROOT>/out/target/product/<product>/system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn
-</pre>
+4. Build Android as normal (https://source.android.com/setup/build/building)
+5. To confirm that the Arm NN driver has been built, check for the driver service executable at
 
-Android Q and later has a different path:
+Android Q
 <pre>
 <ANDROID_ROOT>/out/target/product/<product>/vendor/bin/hw
 </pre>
 
 ### Testing
 
-1. Run the ArmNN driver service executable in the background.
+1. Run the Arm NN driver service executable in the background.
 Use the corresponding version of the driver for the Android version you are running.
 i.e
-android.hardware.neuralnetworks@1.1-service-armnn for Android P,
 android.hardware.neuralnetworks@1.2-service-armnn for Android Q and
-android.hardware.neuralnetworks@1.3-service-armnn for Android R
+android.hardware.neuralnetworks@1.3-service-armnn for Android R, S and T
 <pre>
 It is also possible to use a specific backend by using the -c option.
 The following is an example of using the CpuAcc backend for Android Q:
@@ -100,8 +81,8 @@ adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.2-service-armn
 <pre>
 adb shell /data/nativetest/NeuralNetworksTest_static/NeuralNetworksTest_static > NeuralNetworkTest.log
 </pre>
-3. To confirm that the ArmNN driver is being used to service the Android Neural Networks API requests,
-check for messages in logcat with the `ArmnnDriver` tag.
+3. To confirm that the Arm NN driver is being used to service the Android Neural Networks API requests,
+check for messages in logcat with the `ArmnnDriver` tag. Please note that you need to add ARMNN_DRIVER_DEBUG := 1 to the 'device-vendor.mk' for the logcat to be visible.
 
 ### Using the GPU tuner
 
@@ -110,17 +91,17 @@ There are three levels of tuning: exhaustive, normal and rapid.
 Exhaustive means that all lws values are tested.
 Normal means that a reduced number of lws values are tested, but that generally is sufficient to have a performance close enough to the exhaustive approach.
 Rapid means that only 3 lws values should be tested for each kernel.
-The recommended way of using it with ArmNN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation:
+The recommended way of using it with Arm NN is to generate the tuning data during development of the Android image for a device, and use it in read-only mode during normal operation:
 
-1. Run the ArmNN driver service executable in tuning mode. The path to the tuning data must be writable by the service.
-The following examples assume that the 1.0 version of the driver is being used:
+1. Run the Arm NN driver service executable in tuning mode. The path to the tuning data must be writable by the service.
+The following examples assume that the 1.2 version of the driver is being used:
 <pre>
-adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; --cl-tuned-parameters-mode UpdateTunedParameters --cl-tuning-level exhaustive &
+adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.2-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; --cl-tuned-parameters-mode UpdateTunedParameters --cl-tuning-level exhaustive &
 </pre>
 2. Run a representative set of Android NNAPI testing loads. In this mode of operation, each NNAPI workload will be slow the first time it is executed, as the tuning parameters are being selected. Subsequent executions will use the tuning data which has been generated.
 3. Stop the service.
-4. Deploy the tuned parameters file to a location readable by the ArmNN driver service (for example, to a location within /vendor/etc).
+4. Deploy the tuned parameters file to a location readable by the Arm NN driver service (for example, to a location within /vendor/etc).
 5. During normal operation, pass the location of the tuning data to the driver service (this would normally be done by passing arguments via Android init in the service .rc definition):
 <pre>
-adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.0-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; &
+adb shell /system/vendor/bin/hw/android.hardware.neuralnetworks@1.2-service-armnn --cl-tuned-parameters-file &lt;PATH_TO_TUNING_DATA&gt; &
 </pre>
diff --git a/docs/IntegratorGuide.md.license b/docs/IntegratorGuide.md.license
new file mode 100644
index 0000000..68a3f51
--- /dev/null
+++ b/docs/IntegratorGuide.md.license
@@ -0,0 +1,4 @@
+#
+# Copyright © 2019-2022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
diff --git a/setup.sh b/setup.sh
index 1dfc592..76ea9b7 100755
--- a/setup.sh
+++ b/setup.sh
@@ -1,5 +1,10 @@
 #!/bin/bash
 
+#
+# Copyright © 2018, 2020-022 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
 function AssertZeroExitCode {
   EXITCODE=$?
   if [ $EXITCODE -ne 0 ]; then
@@ -9,21 +14,45 @@ function AssertZeroExitCode {
   fi
 }
 
-if [ ! -d boost_1_64_0 ]; then
-  echo "++ Downloading Boost"
+BUILD_DIR=build-x86_64
+FLATBUFFERS_DIR=$PWD/flatbuffers
+
+function BuildFlatbuffers {
+  pushd flatbuffers
+  rm -rf $BUILD_DIR
+  rm -f CMakeCache.txt
+  FLATBUFFERS_DIR=$PWD
+
+  mkdir -p $BUILD_DIR
+  cd $BUILD_DIR
+
+  echo "+++ Building Google Flatbufers"
+  CMD="cmake -DFLATBUFFERS_BUILD_FLATC=1 -DCMAKE_INSTALL_PREFIX:PATH=$FLATBUFFERS_DIR .."
+  # Force -fPIC to allow relocatable linking.
+  CXXFLAGS="-fPIC" $CMD
+  AssertZeroExitCode "cmake Google Flatbuffers failed. command was: ${CMD}"
+  make all install
+  AssertZeroExitCode "Building Google Flatbuffers failed"
+  mkdir -p $FLATBUFFERS_DIR/bin
+  cp -f flatc $FLATBUFFERS_DIR/bin
+  AssertZeroExitCode "Failed to copy the Flatbuffers Compiler"
+  popd
+}
+
+if [ ! -d flatbuffers ]; then
+  echo "++ Downloading FlatBuffers v2.0.6"
 
-  BOOST_PKG=boost_1_64_0.tar.gz
+  FLATBUFFERS_PKG=v2.0.6.tar.gz
 
-  # There is a problem with downloading boost from the external. Issue can be found here:https://github.com/boostorg/boost/issues/299.
-  # Using a mirror link to download boost.
-  curl -LOk https://dl.bintray.com/boostorg/release/1.64.0/source/boost_1_64_0.tar.gz
-  # curl -LOk https://sourceforge.net/projects/boost/files/boost/1.64.0/boost_1_64_0.tar.gz # had switched to this mirror as we were not able to download boost from boostorg.
-  AssertZeroExitCode "Downloading Boost failed"
+  curl -LOk https://github.com/google/flatbuffers/archive/${FLATBUFFERS_PKG}
+  AssertZeroExitCode "Downloading FlatBuffers failed"
+  mkdir -p flatbuffers
+  tar xzf $FLATBUFFERS_PKG -C flatbuffers --strip-components 1
+  AssertZeroExitCode "Unpacking FlatBuffers failed"
 
-  tar xzf $BOOST_PKG
-  AssertZeroExitCode "Unpacking Boost failed"
+  BuildFlatbuffers
 
-  rm -rf $BOOST_PKG
+  rm -rf $FLATBUFFERS_PKG
 fi
 
 if [ ! -d armnn ]; then
@@ -44,8 +73,22 @@ fi
 # This is required for the Android build system to build clframework (see below)
 pushd clframework
 scons os=android build=embed_only neon=0 opencl=1 embed_kernels=1 validation_tests=0 \
+    arch=arm64-v8.2-a build_dir=android-arm64v8.2-a benchmark_tests=0 -j16 \
+    build/android-arm64v8.2-a/src/core/arm_compute_version.embed build/android-arm64v8.2-a/src/core/CL/cl_kernels
+AssertZeroExitCode "Precompiling clframework failed for v82.a"
+
+scons os=android build=embed_only neon=0 opencl=1 embed_kernels=1 validation_tests=0 \
     arch=arm64-v8a build_dir=android-arm64v8a benchmark_tests=0 -j16 \
     build/android-arm64v8a/src/core/arm_compute_version.embed build/android-arm64v8a/src/core/CL/cl_kernels
-AssertZeroExitCode "Precompiling clframework failed"
+AssertZeroExitCode "Precompiling clframework failed for v8a."
 popd
 
+if [ ! -d armnn/generated ]; then
+  mkdir -p armnn/generated
+fi
+
+if [ ! -f armnn/generated/ArmnnSchema_generated.h ]; then
+  echo "+++ Generating new ArmnnSchema_generated.h"
+  $FLATBUFFERS_DIR/bin/flatc -o armnn/generated --cpp armnn/src/armnnSerializer/ArmnnSchema.fbs
+  AssertZeroExitCode "Generating ArmnnSchema_generated.h failed."
+fi
diff --git a/test/1.0/Convolution2D.cpp b/test/1.0/Convolution2D.cpp
index 9a5d239..2af0915 100644
--- a/test/1.0/Convolution2D.cpp
+++ b/test/1.0/Convolution2D.cpp
@@ -1,19 +1,14 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
-#include "../DriverTestHelpers.hpp"
 #include "../Convolution2D.hpp"
-#include "../../1.0/HalPolicy.hpp"
 
-#include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
 #include <OperationsUtils.h>
 
-BOOST_AUTO_TEST_SUITE(Convolution2DTests)
-
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
@@ -29,14 +24,17 @@ void SetModelFp16Flag(V1_0::Model&, bool)
 
 } // namespace driverTestHelpers
 
-BOOST_AUTO_TEST_CASE(ConvValidPadding_Hal_1_0)
+DOCTEST_TEST_SUITE("Convolution2DTests_1.0")
+{
+
+DOCTEST_TEST_CASE("ConvValidPadding_Hal_1_0")
 {
     PaddingTestImpl<hal_1_0::HalPolicy>(android::nn::kPaddingValid);
 }
 
-BOOST_AUTO_TEST_CASE(ConvSamePadding_Hal_1_0)
+DOCTEST_TEST_CASE("ConvSamePadding_Hal_1_0")
 {
     PaddingTestImpl<hal_1_0::HalPolicy>(android::nn::kPaddingSame);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/1.0/FullyConnectedReshape.cpp b/test/1.0/FullyConnectedReshape.cpp
index 72c90ca..e481f2d 100644
--- a/test/1.0/FullyConnectedReshape.cpp
+++ b/test/1.0/FullyConnectedReshape.cpp
@@ -1,42 +1,39 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "../DriverTestHelpers.hpp"
-#include "../../1.0/FullyConnected.hpp"
 
-#include <boost/test/unit_test.hpp>
-
-BOOST_AUTO_TEST_SUITE(FullyConnectedReshapeTests)
-
-BOOST_AUTO_TEST_CASE(TestFlattenFullyConnectedInput)
+DOCTEST_TEST_SUITE("FullyConnectedReshapeTests")
+{
+DOCTEST_TEST_CASE("TestFlattenFullyConnectedInput")
 {
     using armnn::TensorShape;
 
     // Pass through 2d input
-    BOOST_TEST(FlattenFullyConnectedInput(TensorShape({2,2048}), TensorShape({512, 2048})) ==
-               TensorShape({2, 2048}));
+    DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({2,2048}),
+                                             TensorShape({512, 2048})) == TensorShape({2, 2048}));
 
     // Trivial flattening of batched channels
-    BOOST_TEST(FlattenFullyConnectedInput(TensorShape({97,1,1,2048}), TensorShape({512, 2048})) ==
-               TensorShape({97, 2048}));
+    DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({97,1,1,2048}),
+                                             TensorShape({512, 2048})) == TensorShape({97, 2048}));
 
     // Flatten single batch of rows
-    BOOST_TEST(FlattenFullyConnectedInput(TensorShape({1,97,1,2048}), TensorShape({512, 2048})) ==
-               TensorShape({97, 2048}));
+    DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({1,97,1,2048}),
+                                             TensorShape({512, 2048})) == TensorShape({97, 2048}));
 
     // Flatten single batch of columns
-    BOOST_TEST(FlattenFullyConnectedInput(TensorShape({1,1,97,2048}), TensorShape({512, 2048})) ==
-               TensorShape({97, 2048}));
+    DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({1,1,97,2048}),
+                                             TensorShape({512, 2048})) == TensorShape({97, 2048}));
 
     // Move batches into input dimension
-    BOOST_TEST(FlattenFullyConnectedInput(TensorShape({50,1,1,10}), TensorShape({512, 20})) ==
-               TensorShape({25, 20}));
+    DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({50,1,1,10}),
+                                             TensorShape({512, 20})) == TensorShape({25, 20}));
 
     // Flatten single batch of 3D data (e.g. convolution output)
-    BOOST_TEST(FlattenFullyConnectedInput(TensorShape({1,16,16,10}), TensorShape({512, 2560})) ==
-               TensorShape({1, 2560}));
+    DOCTEST_CHECK(FlattenFullyConnectedInput(TensorShape({1,16,16,10}),
+                                             TensorShape({512, 2560})) == TensorShape({1, 2560}));
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/1.0/Lstm.cpp b/test/1.0/Lstm.cpp
index 5f0a209..6b3e704 100644
--- a/test/1.0/Lstm.cpp
+++ b/test/1.0/Lstm.cpp
@@ -1,34 +1,60 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "../Lstm.hpp"
 
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(LstmTests)
-
 using namespace armnn_driver;
 
-BOOST_DATA_TEST_CASE(LstmNoCifgNoPeepholeNoProjectionTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("LstmTests_1.0_CpuRef")
 {
-    LstmNoCifgNoPeepholeNoProjection<hal_1_0::HalPolicy>(sample);
-}
 
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionTest, COMPUTE_DEVICES)
-{
-    LstmCifgPeepholeNoProjection<hal_1_0::HalPolicy>(sample);
-}
+    DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.0_armnn::Compute::CpuRef")
+    {
+        LstmNoCifgNoPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.0_CpuRef")
+    {
+        LstmCifgPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.0_CpuRef")
+    {
+        LstmNoCifgPeepholeProjection<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.0_CpuRef")
+    {
+        LstmCifgPeepholeNoProjectionBatch2<hal_1_0::HalPolicy>(armnn::Compute::CpuRef);
+    }
 
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionTest, COMPUTE_DEVICES)
-{
-    LstmNoCifgPeepholeProjection<hal_1_0::HalPolicy>(sample);
 }
 
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2Test, COMPUTE_DEVICES)
+#if defined(ARMCOMPUTECL_ENABLED)
+DOCTEST_TEST_SUITE("LstmTests_1.0_GpuAcc")
 {
-    LstmCifgPeepholeNoProjectionBatch2<hal_1_0::HalPolicy>(sample);
-}
 
-BOOST_AUTO_TEST_SUITE_END()
+    DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.0_GpuAcc")
+    {
+        LstmNoCifgNoPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.0_GpuAcc")
+    {
+        LstmCifgPeepholeNoProjection<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.0_GpuAcc")
+    {
+        LstmNoCifgPeepholeProjection<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.0_GpuAcc")
+    {
+        LstmCifgPeepholeNoProjectionBatch2<hal_1_0::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+}
+#endif
diff --git a/test/1.1/Convolution2D.cpp b/test/1.1/Convolution2D.cpp
index 32d5018..4601f76 100644
--- a/test/1.1/Convolution2D.cpp
+++ b/test/1.1/Convolution2D.cpp
@@ -1,19 +1,14 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
-#include "../DriverTestHelpers.hpp"
 #include "../Convolution2D.hpp"
-#include "../../1.1/HalPolicy.hpp"
 
-#include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
 #include <OperationsUtils.h>
 
-BOOST_AUTO_TEST_SUITE(Convolution2DTests)
-
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
@@ -29,24 +24,28 @@ void SetModelFp16Flag(V1_1::Model& model, bool fp16Enabled)
 
 } // namespace driverTestHelpers
 
-BOOST_AUTO_TEST_CASE(ConvValidPadding_Hal_1_1)
+
+DOCTEST_TEST_SUITE("Convolution2DTests_1.1")
+{
+
+DOCTEST_TEST_CASE("ConvValidPadding_Hal_1_1")
 {
     PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingValid);
 }
 
-BOOST_AUTO_TEST_CASE(ConvSamePadding_Hal_1_1)
+DOCTEST_TEST_CASE("ConvSamePadding_Hal_1_1")
 {
     PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingSame);
 }
 
-BOOST_AUTO_TEST_CASE(ConvValidPaddingFp16Flag_Hal_1_1)
+DOCTEST_TEST_CASE("ConvValidPaddingFp16Flag_Hal_1_1")
 {
     PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingValid, true);
 }
 
-BOOST_AUTO_TEST_CASE(ConvSamePaddingFp16Flag_Hal_1_1)
+DOCTEST_TEST_CASE("ConvSamePaddingFp16Flag_Hal_1_1")
 {
     PaddingTestImpl<hal_1_1::HalPolicy>(android::nn::kPaddingSame, true);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/1.1/Lstm.cpp b/test/1.1/Lstm.cpp
index 703597e..cbdf6b1 100644
--- a/test/1.1/Lstm.cpp
+++ b/test/1.1/Lstm.cpp
@@ -1,34 +1,60 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "../Lstm.hpp"
 
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(LstmTests)
-
 using namespace armnn_driver;
 
-BOOST_DATA_TEST_CASE(LstmNoCifgNoPeepholeNoProjectionTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("LstmTests_1.1_CpuRef")
 {
-    LstmNoCifgNoPeepholeNoProjection<hal_1_1::HalPolicy>(sample);
-}
 
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionTest, COMPUTE_DEVICES)
-{
-    LstmCifgPeepholeNoProjection<hal_1_1::HalPolicy>(sample);
-}
+    DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.1_armnn::Compute::CpuRef")
+    {
+        LstmNoCifgNoPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.1_CpuRef")
+    {
+        LstmCifgPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.1_CpuRef")
+    {
+        LstmNoCifgPeepholeProjection<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.1_CpuRef")
+    {
+        LstmCifgPeepholeNoProjectionBatch2<hal_1_1::HalPolicy>(armnn::Compute::CpuRef);
+    }
 
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionTest, COMPUTE_DEVICES)
-{
-    LstmNoCifgPeepholeProjection<hal_1_1::HalPolicy>(sample);
 }
 
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2Test, COMPUTE_DEVICES)
+#if defined(ARMCOMPUTECL_ENABLED)
+DOCTEST_TEST_SUITE("LstmTests_1.1_GpuAcc")
 {
-    LstmCifgPeepholeNoProjectionBatch2<hal_1_1::HalPolicy>(sample);
-}
 
-BOOST_AUTO_TEST_SUITE_END()
+    DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.1_GpuAcc")
+    {
+        LstmNoCifgNoPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.1_GpuAcc")
+    {
+        LstmCifgPeepholeNoProjection<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.1_GpuAcc")
+    {
+        LstmNoCifgPeepholeProjection<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.1_GpuAcc")
+    {
+        LstmCifgPeepholeNoProjectionBatch2<hal_1_1::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+}
+#endif
diff --git a/test/1.1/Mean.cpp b/test/1.1/Mean.cpp
index 8c52d23..70bdc3d 100644
--- a/test/1.1/Mean.cpp
+++ b/test/1.1/Mean.cpp
@@ -1,34 +1,25 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017, 2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "../DriverTestHelpers.hpp"
 #include "../TestTensor.hpp"
 
-#include "../1.1/HalPolicy.hpp"
-
-#include <boost/test/data/test_case.hpp>
+#include <1.1/HalPolicy.hpp>
 
 #include <array>
 
-BOOST_AUTO_TEST_SUITE(MeanTests)
-
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
 
 using HalPolicy = hal_1_1::HalPolicy;
+using RequestArgument = V1_0::RequestArgument;
 
 namespace
 {
 
-#ifndef ARMCOMPUTECL_ENABLED
-    static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-    static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
 void MeanTestImpl(const TestTensor& input,
                   const hidl_vec<uint32_t>& axisDimensions,
                   const int32_t* axisValues,
@@ -61,22 +52,22 @@ void MeanTestImpl(const TestTensor& input,
     android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
 
     // The request's memory pools will follow the same order as the inputs
-    DataLocation inLoc    = {};
-    inLoc.poolIndex       = 0;
-    inLoc.offset          = 0;
-    inLoc.length          = input.GetNumElements() * sizeof(float);
-    RequestArgument inArg = {};
-    inArg.location        = inLoc;
-    inArg.dimensions      = input.GetDimensions();
+    V1_0::DataLocation inLoc = {};
+    inLoc.poolIndex          = 0;
+    inLoc.offset             = 0;
+    inLoc.length             = input.GetNumElements() * sizeof(float);
+    RequestArgument inArg    = {};
+    inArg.location           = inLoc;
+    inArg.dimensions         = input.GetDimensions();
 
     // An additional memory pool is needed for the output
-    DataLocation outLoc    = {};
-    outLoc.poolIndex       = 1;
-    outLoc.offset          = 0;
-    outLoc.length          = expectedOutput.GetNumElements() * sizeof(float);
-    RequestArgument outArg = {};
-    outArg.location        = outLoc;
-    outArg.dimensions      = expectedOutput.GetDimensions();
+    V1_0::DataLocation outLoc = {};
+    outLoc.poolIndex          = 1;
+    outLoc.offset             = 0;
+    outLoc.length             = expectedOutput.GetNumElements() * sizeof(float);
+    RequestArgument outArg    = {};
+    outArg.location           = outLoc;
+    outArg.dimensions         = expectedOutput.GetDimensions();
 
     // Make the request based on the arguments
     V1_0::Request request = {};
@@ -93,64 +84,177 @@ void MeanTestImpl(const TestTensor& input,
     if (preparedModel.get() != nullptr)
     {
         V1_0::ErrorStatus execStatus = Execute(preparedModel, request);
-        BOOST_TEST(execStatus == V1_0::ErrorStatus::NONE);
+        DOCTEST_CHECK((int)execStatus == (int)V1_0::ErrorStatus::NONE);
     }
 
     const float* expectedOutputData = expectedOutput.GetData();
     for (unsigned int i = 0; i < expectedOutput.GetNumElements(); i++)
     {
-        BOOST_TEST(outputData[i] == expectedOutputData[i]);
+        DOCTEST_CHECK(outputData[i] == expectedOutputData[i]);
     }
 }
 
 } // anonymous namespace
 
-BOOST_DATA_TEST_CASE(MeanNoKeepDimsTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("MeanTests_CpuRef")
 {
-    TestTensor input{ armnn::TensorShape{ 4, 3, 2 }, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
-                                                       11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
-                                                       20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
-    hidl_vec<uint32_t> axisDimensions = { 2 };
-    int32_t axisValues[] = { 0, 1 };
-    int32_t keepDims = 0;
-    TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
-
-    MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, sample);
-}
 
-BOOST_DATA_TEST_CASE(MeanKeepDimsTest, COMPUTE_DEVICES)
-{
-    TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
-    hidl_vec<uint32_t> axisDimensions = { 1 };
-    int32_t axisValues[] = { 2 };
-    int32_t keepDims = 1;
-    TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+    DOCTEST_TEST_CASE("MeanNoKeepDimsTest_CpuRef")
+    {
+        TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                          { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+                            11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+                            20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("MeanKeepDimsTest_CpuRef")
+    {
+        TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("MeanFp16EnabledNoKeepDimsTest_CpuRef")
+    {
+        TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                          { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+                            11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+                            20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("MeanFp16EnabledKeepDimsTest_CpuRef")
+    {
+        TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+    }
 
-    MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, sample);
 }
 
-BOOST_DATA_TEST_CASE(MeanFp16NoKeepDimsTest, COMPUTE_DEVICES)
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("MeanTests_CpuAcc")
 {
-    TestTensor input{ armnn::TensorShape{ 4, 3, 2 }, { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
-                                                       11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
-                                                       20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
-    hidl_vec<uint32_t> axisDimensions = { 2 };
-    int32_t axisValues[] = { 0, 1 };
-    int32_t keepDims = 0;
-    TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
-
-    MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, sample);
+    DOCTEST_TEST_CASE("MeanNoKeepDimsTest_CpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                          { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+                            11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+                            20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("MeanKeepDimsTest_CpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::CpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("MeanFp16EnabledNoKeepDimsTest_CpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                          { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+                            11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+                            20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("MeanFp16EnabledKeepDimsTest_CpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+    }
 }
 
-BOOST_DATA_TEST_CASE(MeanFp16KeepDimsTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("MeanTests_GpuAcc")
 {
-    TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
-    hidl_vec<uint32_t> axisDimensions = { 1 };
-    int32_t axisValues[] = { 2 };
-    int32_t keepDims = 1;
-    TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+    DOCTEST_TEST_CASE("MeanNoKeepDimsTest_GpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                          { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+                            11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+                            20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::GpuAcc);
+    }
 
-    MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, sample);
-}
+    DOCTEST_TEST_CASE("MeanKeepDimsTest_GpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, false, armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("MeanFp16EnabledNoKeepDimsTest_GpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                          { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f,
+                            11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f,
+                            20.0f, 21.0f, 22.0f, 23.0f, 24.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        TestTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0f, 13.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+    }
 
-BOOST_AUTO_TEST_SUITE_END()
+    DOCTEST_TEST_CASE("MeanFp16EnabledKeepDimsTest_GpuAcc")
+    {
+        TestTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0f, 1.0f, 2.0f, 2.0f, 3.0f, 3.0f } };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        TestTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0f, 2.0f } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+    }
+}
+#endif
diff --git a/test/1.1/Transpose.cpp b/test/1.1/Transpose.cpp
index 9d24000..5499e0d 100644
--- a/test/1.1/Transpose.cpp
+++ b/test/1.1/Transpose.cpp
@@ -1,39 +1,28 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#include "OperationsUtils.h"
 
 #include "../DriverTestHelpers.hpp"
 #include "../TestTensor.hpp"
-
-#include "../1.1/HalPolicy.hpp"
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
+#include <1.1/HalPolicy.hpp>
 
 #include <log/log.h>
+#include <OperationsUtils.h>
 
 #include <array>
 #include <cmath>
 
-BOOST_AUTO_TEST_SUITE(TransposeTests)
-
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
 
 using HalPolicy = hal_1_1::HalPolicy;
+using RequestArgument = V1_0::RequestArgument;
 
 namespace
 {
 
-#ifndef ARMCOMPUTECL_ENABLED
-    static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-    static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
 void TransposeTestImpl(const TestTensor & inputs, int32_t perm[],
                        const TestTensor & expectedOutputTensor, armnn::Compute computeDevice)
 {
@@ -58,22 +47,22 @@ void TransposeTestImpl(const TestTensor & inputs, int32_t perm[],
 
     // the request's memory pools will follow the same order as
     // the inputs
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset = 0;
-    inloc.length = inputs.GetNumElements() * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = inputs.GetDimensions();
+    V1_0::DataLocation inloc = {};
+    inloc.poolIndex          = 0;
+    inloc.offset             = 0;
+    inloc.length             = inputs.GetNumElements() * sizeof(float);
+    RequestArgument input    = {};
+    input.location           = inloc;
+    input.dimensions         = inputs.GetDimensions();
 
     // and an additional memory pool is needed for the output
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset = 0;
-    outloc.length = expectedOutputTensor.GetNumElements() * sizeof(float);
-    RequestArgument output = {};
-    output.location = outloc;
-    output.dimensions = expectedOutputTensor.GetDimensions();
+    V1_0::DataLocation outloc = {};
+    outloc.poolIndex          = 1;
+    outloc.offset             = 0;
+    outloc.length             = expectedOutputTensor.GetNumElements() * sizeof(float);
+    RequestArgument output    = {};
+    output.location           = outloc;
+    output.dimensions         = expectedOutputTensor.GetDimensions();
 
     // make the request based on the arguments
     V1_0::Request request = {};
@@ -97,38 +86,100 @@ void TransposeTestImpl(const TestTensor & inputs, int32_t perm[],
     const float * expectedOutput = expectedOutputTensor.GetData();
     for (unsigned int i = 0; i < expectedOutputTensor.GetNumElements(); ++i)
     {
-        BOOST_TEST(outdata[i] == expectedOutput[i]);
+        DOCTEST_CHECK(outdata[i] == expectedOutput[i]);
     }
 }
 
 } // namespace
 
-BOOST_DATA_TEST_CASE(Transpose , COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("TransposeTests_CpuRef")
 {
-    int32_t perm[] = {2, 3, 1, 0};
-    TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
-    TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
+    DOCTEST_TEST_CASE("Transpose_CpuRef")
+    {
+        int32_t perm[] = {2, 3, 1, 0};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+        TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+        TransposeTestImpl(input, perm, expected, armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("TransposeNHWCToArmNN_CpuRef")
+    {
+        int32_t perm[] = {0, 3, 1, 2};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
+        TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
 
-    TransposeTestImpl(input, perm, expected, sample);
+        TransposeTestImpl(input, perm, expected, armnn::Compute::CpuRef);
+    }
+    DOCTEST_TEST_CASE("TransposeArmNNToNHWC_CpuRef")
+    {
+        int32_t perm[] = {0, 2, 3, 1};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+        TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+        TransposeTestImpl(input, perm, expected, armnn::Compute::CpuRef);
+    }
 }
 
-BOOST_DATA_TEST_CASE(TransposeNHWCToArmNN , COMPUTE_DEVICES)
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("TransposeTests_CpuAcc")
 {
-    int32_t perm[] = {0, 3, 1, 2};
-    TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
-    TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
+    DOCTEST_TEST_CASE("Transpose_CpuAcc")
+    {
+        int32_t perm[] = {2, 3, 1, 0};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+        TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+        TransposeTestImpl(input, perm, expected, armnn::Compute::CpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("TransposeNHWCToArmNN_CpuAcc")
+    {
+        int32_t perm[] = {0, 3, 1, 2};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
+        TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
+
+        TransposeTestImpl(input, perm, expected, armnn::Compute::CpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("TransposeArmNNToNHWC_CpuAcc")
+    {
+        int32_t perm[] = {0, 2, 3, 1};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+        TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
 
-    TransposeTestImpl(input, perm, expected, sample);
+        TransposeTestImpl(input, perm, expected, armnn::Compute::CpuAcc);
+    }
 }
 
-BOOST_DATA_TEST_CASE(TransposeArmNNToNHWC , COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("TransposeTests_GpuAcc")
 {
-    int32_t perm[] = {0, 2, 3, 1};
-    TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
-    TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
+    DOCTEST_TEST_CASE("Transpose_GpuAcc")
+    {
+        int32_t perm[] = {2, 3, 1, 0};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+        TestTensor expected{armnn::TensorShape{2, 2, 2, 1},{1, 5, 2, 6, 3, 7, 4, 8}};
 
-    TransposeTestImpl(input, perm, expected, sample);
-}
+        TransposeTestImpl(input, perm, expected, armnn::Compute::GpuAcc);
+    }
 
-BOOST_AUTO_TEST_SUITE_END()
+    DOCTEST_TEST_CASE("TransposeNHWCToArmNN_GpuAcc")
+    {
+        int32_t perm[] = {0, 3, 1, 2};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 3},{1, 2, 3, 11, 12, 13, 21, 22, 23, 31, 32, 33}};
+        TestTensor expected{armnn::TensorShape{1, 3, 2, 2},{1, 11, 21, 31, 2, 12, 22, 32, 3, 13, 23, 33}};
+
+        TransposeTestImpl(input, perm, expected, armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("TransposeArmNNToNHWC_GpuAcc")
+    {
+        int32_t perm[] = {0, 2, 3, 1};
+        TestTensor input{armnn::TensorShape{1, 2, 2, 2},{1, 2, 3, 4, 5, 6, 7, 8}};
+        TestTensor expected{armnn::TensorShape{1, 2, 2, 2},{1, 5, 2, 6, 3, 7, 4, 8}};
+
+        TransposeTestImpl(input, perm, expected, armnn::Compute::GpuAcc);
+    }
+}
+#endif
 
diff --git a/test/1.2/Capabilities.cpp b/test/1.2/Capabilities.cpp
index f25723d..41d5ee5 100644
--- a/test/1.2/Capabilities.cpp
+++ b/test/1.2/Capabilities.cpp
@@ -1,15 +1,12 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
-#include "../../1.2/ArmnnDriverImpl.hpp"
-
+#include "../DriverTestHelpers.hpp"
 #include "Utils.h"
 
-#include <armnn/utility/Assert.hpp>
-
-#include <boost/test/unit_test.hpp>
+#include <1.2/ArmnnDriverImpl.hpp>
 
 #include <sys/system_properties.h>
 
@@ -61,16 +58,15 @@ struct CapabilitiesFixture
 void CheckOperandType(const V1_2::Capabilities& capabilities, V1_2::OperandType type, float execTime, float powerUsage)
 {
     using namespace armnn_driver::hal_1_2;
-    PerformanceInfo perfInfo = android::nn::lookup(capabilities.operandPerformance, type);
-    ARMNN_ASSERT(perfInfo.execTime == execTime);
-    ARMNN_ASSERT(perfInfo.powerUsage == powerUsage);
+    V1_0::PerformanceInfo perfInfo = android::nn::lookup(capabilities.operandPerformance, type);
+    DOCTEST_CHECK(perfInfo.execTime == execTime);
+    DOCTEST_CHECK(perfInfo.powerUsage == powerUsage);
 }
 
-BOOST_FIXTURE_TEST_SUITE(CapabilitiesTests, CapabilitiesFixture)
-
-BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesWithRuntime)
+DOCTEST_TEST_SUITE("CapabilitiesTests")
+{
+DOCTEST_TEST_CASE_FIXTURE(CapabilitiesFixture, "PerformanceCapabilitiesWithRuntime")
 {
-    using namespace armnn_driver::hal_1_2;
     using namespace android::nn;
 
     auto getCapabilitiesFn = [&](V1_0::ErrorStatus error, const V1_2::Capabilities& capabilities)
@@ -94,7 +90,8 @@ BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesWithRuntime)
             CheckOperandType(capabilities, V1_2::OperandType::OEM, FLT_MAX, FLT_MAX);
             CheckOperandType(capabilities, V1_2::OperandType::TENSOR_OEM_BYTE, FLT_MAX, FLT_MAX);
 
-            ARMNN_ASSERT(error == V1_0::ErrorStatus::NONE);
+            bool result = (error == V1_0::ErrorStatus::NONE);
+            DOCTEST_CHECK(result);
         };
 
     __system_property_set("Armnn.operandTypeTensorFloat32Performance.execTime", "2.0f");
@@ -121,12 +118,11 @@ BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesWithRuntime)
     armnn::IRuntime::CreationOptions options;
     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
 
-    ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
+    armnn_driver::hal_1_2::ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
 }
 
-BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesUndefined)
+DOCTEST_TEST_CASE_FIXTURE(CapabilitiesFixture, "PerformanceCapabilitiesUndefined")
 {
-    using namespace armnn_driver::hal_1_2;
     using namespace android::nn;
 
     float defaultValue = .1f;
@@ -155,13 +151,14 @@ BOOST_AUTO_TEST_CASE(PerformanceCapabilitiesUndefined)
             CheckOperandType(capabilities, V1_2::OperandType::OEM, FLT_MAX, FLT_MAX);
             CheckOperandType(capabilities, V1_2::OperandType::TENSOR_OEM_BYTE, FLT_MAX, FLT_MAX);
 
-            ARMNN_ASSERT(error == V1_0::ErrorStatus::NONE);
+            bool result = (error == V1_0::ErrorStatus::NONE);
+            DOCTEST_CHECK(result);
         };
 
     armnn::IRuntime::CreationOptions options;
     armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
 
-    ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
+    armnn_driver::hal_1_2::ArmnnDriverImpl::getCapabilities_1_2(runtime, getCapabilitiesFn);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
-\ No newline at end of file
+}
+\ No newline at end of file
diff --git a/test/1.2/Dilation.cpp b/test/1.2/Dilation.cpp
index 1a7ba4b..c9182a7 100644
--- a/test/1.2/Dilation.cpp
+++ b/test/1.2/Dilation.cpp
@@ -1,17 +1,16 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "../Dilation.hpp"
 
-#include "../../1.2/HalPolicy.hpp"
+#include <1.2/HalPolicy.hpp>
 
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(DilationTests)
+DOCTEST_TEST_SUITE("DilationTests")
+{
 
-BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("ConvolutionExplicitPaddingNoDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = false;
@@ -21,7 +20,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingNoDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingDilation)
+DOCTEST_TEST_CASE("ConvolutionExplicitPaddingDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = false;
@@ -31,7 +30,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionExplicitPaddingDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("ConvolutionImplicitPaddingNoDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = false;
@@ -41,7 +40,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingNoDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingDilation)
+DOCTEST_TEST_CASE("ConvolutionImplicitPaddingDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = false;
@@ -51,7 +50,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionImplicitPaddingDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionExplicitPaddingNoDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = true;
@@ -61,7 +60,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingNoDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionExplicitPaddingDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = true;
@@ -71,7 +70,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionExplicitPaddingDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingNoDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionImplicitPaddingNoDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = true;
@@ -81,7 +80,7 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingNoDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingDilation)
+DOCTEST_TEST_CASE("DepthwiseConvolutionImplicitPaddingDilation")
 {
     DilationTestOptions options;
     options.m_IsDepthwiseConvolution = true;
@@ -91,4 +90,4 @@ BOOST_AUTO_TEST_CASE(DepthwiseConvolutionImplicitPaddingDilation)
     DilationTestImpl<hal_1_2::HalPolicy>(options);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
-\ No newline at end of file
+}
+\ No newline at end of file
diff --git a/test/1.2/Lstm.cpp b/test/1.2/Lstm.cpp
index 03f7fe4..7a2b394 100644
--- a/test/1.2/Lstm.cpp
+++ b/test/1.2/Lstm.cpp
@@ -1,51 +1,72 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "../Lstm.hpp"
 
-#include <boost/test/data/test_case.hpp>
-
-BOOST_AUTO_TEST_SUITE(LstmTests)
-
 using namespace armnn_driver;
 
-BOOST_DATA_TEST_CASE(LstmNoCifgNoPeepholeNoProjectionTest, COMPUTE_DEVICES)
+#if defined(ARMNNREF_ENABLED)
+DOCTEST_TEST_SUITE("LstmTests_1.2_CpuRef")
 {
-    LstmNoCifgNoPeepholeNoProjection<hal_1_2::HalPolicy>(sample);
-}
 
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionTest, COMPUTE_DEVICES)
-{
-    LstmCifgPeepholeNoProjection<hal_1_2::HalPolicy>(sample);
-}
+    DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.2_armnn::Compute::CpuRef")
+    {
+        LstmNoCifgNoPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
 
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionTest, COMPUTE_DEVICES)
-{
-    LstmNoCifgPeepholeProjection<hal_1_2::HalPolicy>(sample);
-}
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.2_CpuRef")
+    {
+        LstmCifgPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
 
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeNoProjectionBatch2Test, COMPUTE_DEVICES)
-{
-    LstmCifgPeepholeNoProjectionBatch2<hal_1_2::HalPolicy>(sample);
-}
+    DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.2_CpuRef")
+    {
+        LstmNoCifgPeepholeProjection<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
 
-BOOST_DATA_TEST_CASE(LstmNoCifgPeepholeProjectionNoClippingLayerNormTest, COMPUTE_DEVICES)
-{
-    LstmNoCifgPeepholeProjectionNoClippingLayerNorm<hal_1_2::HalPolicy>(sample);
-}
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.2_CpuRef")
+    {
+        LstmCifgPeepholeNoProjectionBatch2<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("QuantizedLstmTest_1.2_CpuRef")
+    {
+        QuantizedLstm<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
 
-BOOST_DATA_TEST_CASE(LstmCifgPeepholeProjectionNoClippingLayerNormTest, COMPUTE_DEVICES)
-{
-    LstmCifgPeepholeProjectionNoClippingLayerNorm<hal_1_2::HalPolicy>(sample);
 }
+#endif
 
 #if defined(ARMCOMPUTECL_ENABLED)
-BOOST_DATA_TEST_CASE(QuantizedLstmTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("LstmTests_1.2_GpuAcc")
 {
-    QuantizedLstm<hal_1_2::HalPolicy>(sample);
+
+    DOCTEST_TEST_CASE("LstmNoCifgNoPeepholeNoProjectionTest_1.2_GpuAcc")
+    {
+        LstmNoCifgNoPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionTest_1.2_GpuAcc")
+    {
+        LstmCifgPeepholeNoProjection<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmNoCifgPeepholeProjectionTest_1.2_GpuAcc")
+    {
+        LstmNoCifgPeepholeProjection<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("LstmCifgPeepholeNoProjectionBatch2Test_1.2_GpuAcc")
+    {
+        LstmCifgPeepholeNoProjectionBatch2<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("QuantizedLstmTest_1.2_GpuAcc")
+    {
+        QuantizedLstm<hal_1_2::HalPolicy>(armnn::Compute::GpuAcc);
+    }
+
 }
 #endif
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/1.2/Mean.cpp b/test/1.2/Mean.cpp
new file mode 100644
index 0000000..a2a8b7a
--- /dev/null
+++ b/test/1.2/Mean.cpp
@@ -0,0 +1,204 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "../DriverTestHelpers.hpp"
+#include "../TestHalfTensor.hpp"
+
+#include <1.2/HalPolicy.hpp>
+
+#include <array>
+
+using Half = half_float::half;
+
+using namespace android::hardware;
+using namespace driverTestHelpers;
+using namespace armnn_driver;
+
+using HalPolicy = hal_1_2::HalPolicy;
+using RequestArgument = V1_0::RequestArgument;
+
+namespace
+{
+
+void MeanTestImpl(const TestHalfTensor& input,
+                  const hidl_vec<uint32_t>& axisDimensions,
+                  const int32_t* axisValues,
+                  int32_t keepDims,
+                  const TestHalfTensor& expectedOutput,
+                  bool fp16Enabled,
+                  armnn::Compute computeDevice)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(computeDevice, fp16Enabled));
+
+    HalPolicy::Model model = {};
+
+    AddInputOperand<HalPolicy>(model, input.GetDimensions(), V1_2::OperandType::TENSOR_FLOAT16);
+
+    AddTensorOperand<HalPolicy>(model,
+                                axisDimensions,
+                                const_cast<int32_t*>(axisValues),
+                                HalPolicy::OperandType::TENSOR_INT32);
+
+    AddIntOperand<HalPolicy>(model, keepDims);
+
+    AddOutputOperand<HalPolicy>(model, expectedOutput.GetDimensions(), V1_2::OperandType::TENSOR_FLOAT16);
+
+    model.operations.resize(1);
+    model.operations[0].type               = HalPolicy::OperationType::MEAN;
+    model.operations[0].inputs             = hidl_vec<uint32_t>{ 0, 1, 2 };
+    model.operations[0].outputs            = hidl_vec<uint32_t>{ 3 };
+    model.relaxComputationFloat32toFloat16 = fp16Enabled;
+
+    //android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
+    android::sp<V1_2::IPreparedModel> preparedModel = PrepareModel_1_2(model, *driver);
+
+    // The request's memory pools will follow the same order as the inputs
+    V1_0::DataLocation inLoc = {};
+    inLoc.poolIndex          = 0;
+    inLoc.offset             = 0;
+    inLoc.length             = input.GetNumElements() * sizeof(Half);
+    RequestArgument inArg    = {};
+    inArg.location           = inLoc;
+    inArg.dimensions         = input.GetDimensions();
+
+    // An additional memory pool is needed for the output
+    V1_0::DataLocation outLoc = {};
+    outLoc.poolIndex          = 1;
+    outLoc.offset             = 0;
+    outLoc.length             = expectedOutput.GetNumElements() * sizeof(Half);
+    RequestArgument outArg    = {};
+    outArg.location           = outLoc;
+    outArg.dimensions         = expectedOutput.GetDimensions();
+
+    // Make the request based on the arguments
+    V1_0::Request request = {};
+    request.inputs  = hidl_vec<RequestArgument>{ inArg };
+    request.outputs = hidl_vec<RequestArgument>{ outArg };
+
+    // Set the input data
+    AddPoolAndSetData(input.GetNumElements(), request, input.GetData());
+
+    // Add memory for the output
+    android::sp<IMemory> outMemory = AddPoolAndGetData<Half>(expectedOutput.GetNumElements(), request);
+    const Half* outputData = static_cast<const Half*>(static_cast<void*>(outMemory->getPointer()));
+
+    if (preparedModel.get() != nullptr)
+    {
+        V1_0::ErrorStatus execStatus = Execute(preparedModel, request);
+        DOCTEST_CHECK((int)execStatus == (int)V1_0::ErrorStatus::NONE);
+    }
+
+    const Half* expectedOutputData = expectedOutput.GetData();
+    for (unsigned int i = 0; i < expectedOutput.GetNumElements(); i++)
+    {
+        DOCTEST_CHECK(outputData[i] == expectedOutputData[i]);
+    }
+}
+
+} // anonymous namespace
+
+DOCTEST_TEST_SUITE("MeanTests_1.2_CpuRef")
+{
+
+DOCTEST_TEST_CASE("MeanFp16NoKeepDimsTest_CpuRef")
+{
+    using namespace half_float::literal;
+
+    TestHalfTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                      { 1.0_h, 2.0_h, 3.0_h, 4.0_h, 5.0_h, 6.0_h, 7.0_h, 8.0_h, 9.0_h, 10.0_h,
+                        11.0_h, 12.0_h, 13.0_h, 14.0_h, 15.0_h, 16.0_h, 17.0_h, 18.0_h, 19.0_h,
+                        20.0_h, 21.0_h, 22.0_h, 23.0_h, 24.0_h } };
+    hidl_vec<uint32_t> axisDimensions = { 2 };
+    int32_t axisValues[] = { 0, 1 };
+    int32_t keepDims = 0;
+    TestHalfTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0_h, 13.0_h } };
+
+    MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MeanFp16KeepDimsTest_CpuRef")
+{
+    using namespace half_float::literal;
+
+    TestHalfTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0_h, 1.0_h, 2.0_h, 2.0_h, 3.0_h, 3.0_h } };
+    hidl_vec<uint32_t> axisDimensions = { 1 };
+    int32_t axisValues[] = { 2 };
+    int32_t keepDims = 1;
+    TestHalfTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0_h, 2.0_h } };
+
+    MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuRef);
+}
+
+}
+
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("MeanTests_1.2_CpuAcc")
+{
+    DOCTEST_TEST_CASE("MeanFp16NoKeepDimsTest_CpuAcc")
+    {
+        using namespace half_float::literal;
+
+        std::vector<Half> in = { 1.0_h, 2.0_h, 3.0_h, 4.0_h, 5.0_h, 6.0_h, 7.0_h, 8.0_h, 9.0_h, 10.0_h,
+                            11.0_h, 12.0_h, 13.0_h, 14.0_h, 15.0_h, 16.0_h, 17.0_h, 18.0_h, 19.0_h,
+                            20.0_h, 21.0_h, 22.0_h, 23.0_h, 24.0_h };
+        TestHalfTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                           in};
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        std::vector<Half> out = { 12.0_h, 13.0_h };
+        TestHalfTensor expectedOutput{ armnn::TensorShape{ 2 }, out };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("MeanFp16KeepDimsTest_CpuAcc")
+    {
+        using namespace half_float::literal;
+
+        std::vector<Half> in = { 1.0_h, 1.0_h, 2.0_h, 2.0_h, 3.0_h, 3.0_h };
+        TestHalfTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, in };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        std::vector<Half> out = {  2.0_h, 2.0_h };
+        TestHalfTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, out };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::CpuAcc);
+    }
+}
+
+DOCTEST_TEST_SUITE("MeanTests_1.2_GpuAcc")
+{
+    DOCTEST_TEST_CASE("MeanFp16NoKeepDimsTest_GpuAcc")
+    {
+        using namespace half_float::literal;
+
+        TestHalfTensor input{ armnn::TensorShape{ 4, 3, 2 },
+                          { 1.0_h, 2.0_h, 3.0_h, 4.0_h, 5.0_h, 6.0_h, 7.0_h, 8.0_h, 9.0_h, 10.0_h,
+                            11.0_h, 12.0_h, 13.0_h, 14.0_h, 15.0_h, 16.0_h, 17.0_h, 18.0_h, 19.0_h,
+                            20.0_h, 21.0_h, 22.0_h, 23.0_h, 24.0_h } };
+        hidl_vec<uint32_t> axisDimensions = { 2 };
+        int32_t axisValues[] = { 0, 1 };
+        int32_t keepDims = 0;
+        TestHalfTensor expectedOutput{ armnn::TensorShape{ 2 }, { 12.0_h, 13.0_h } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("MeanFp16KeepDimsTest_GpuAcc")
+    {
+        using namespace half_float::literal;
+
+        TestHalfTensor input{ armnn::TensorShape{ 1, 1, 3, 2 }, { 1.0_h, 1.0_h, 2.0_h, 2.0_h, 3.0_h, 3.0_h } };
+        hidl_vec<uint32_t> axisDimensions = { 1 };
+        int32_t axisValues[] = { 2 };
+        int32_t keepDims = 1;
+        TestHalfTensor expectedOutput{ armnn::TensorShape{ 1, 1, 1, 2 }, {  2.0_h, 2.0_h } };
+
+        MeanTestImpl(input, axisDimensions, axisValues, keepDims, expectedOutput, true, armnn::Compute::GpuAcc);
+    }
+}
+#endif
diff --git a/test/1.2/UnidirectionalSequenceLstm.cpp b/test/1.2/UnidirectionalSequenceLstm.cpp
new file mode 100644
index 0000000..fd35aa4
--- /dev/null
+++ b/test/1.2/UnidirectionalSequenceLstm.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "../UnidirectionalSequenceLstm.hpp"
+
+using namespace armnn_driver;
+
+DOCTEST_TEST_SUITE("UnidirectionalSequenceLstmTests_1.2_CpuRef")
+{
+
+    DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerFloat32Test_1.2_CpuRef")
+    {
+        UnidirectionalSequenceLstmLayerFloat32TestImpl<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerFloat32TimeMajorTest_1.2_CpuRef")
+    {
+        UnidirectionalSequenceLstmLayerFloat32TimeMajorTestImpl<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionTest_1.2_CpuRef")
+    {
+        UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionTestImpl<hal_1_2::HalPolicy>
+            (armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTest_1.2_CpuRef")
+    {
+        UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<hal_1_2::HalPolicy>
+            (armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTest_1.2_CpuRef")
+    {
+        UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTestImpl<hal_1_2::HalPolicy>(armnn::Compute::CpuRef);
+    }
+
+}
+\ No newline at end of file
diff --git a/test/1.3/QLstm.cpp b/test/1.3/QLstm.cpp
index 2dbd8b3..0846619 100644
--- a/test/1.3/QLstm.cpp
+++ b/test/1.3/QLstm.cpp
@@ -1,23 +1,14 @@
 //
-// Copyright © 2020 Arm Ltd. All rights reserved.
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "../DriverTestHelpers.hpp"
-#include "../TestTensor.hpp"
 
-#include "../1.3/HalPolicy.hpp"
-
-#include <armnn/utility/IgnoreUnused.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
-#include <boost/math/special_functions/relative_difference.hpp>
+#include <1.3/HalPolicy.hpp>
 
 #include <array>
 
-BOOST_AUTO_TEST_SUITE(QLSTMTests)
-
 using ArmnnDriver   = armnn_driver::ArmnnDriver;
 using DriverOptions = armnn_driver::DriverOptions;
 
@@ -26,13 +17,15 @@ using namespace android::hardware;
 
 using HalPolicy = hal_1_3::HalPolicy;
 
+static const float TOLERANCE = 1.0f;
+
 namespace
 {
 
 template<typename T>
 RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int poolIndex)
 {
-    DataLocation inputInloc = {};
+    V1_0::DataLocation inputInloc = {};
     inputInloc.poolIndex = poolIndex;
     inputInloc.offset = 0;
     inputInloc.length = value.size() * sizeof(T);
@@ -42,26 +35,6 @@ RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int
     return inputRequestArgument;
 }
 
-// Returns true if the relative difference between two float values is less than the tolerance value given.
-// This is used because the floating point comparison tolerance (set on each BOOST_AUTO_TEST_CASE) does not work!
-bool TolerantCompareEqual(float a, float b, float tolerance = 1.0f)
-{
-    float rd;
-    if (a == 0.0f)
-    {
-        rd = fabs(b);
-    }
-    else if (b == 0.0f)
-    {
-        rd = fabs(a);
-    }
-    else
-    {
-        rd = boost::math::relative_difference(a, b);
-    }
-    return rd < tolerance;
-}
-
 // Helper function to create an OperandLifeTime::NO_VALUE for testing.
 // To be used on optional input operands that have no values - these are valid and should be tested.
 HalPolicy::OperandLifeTime CreateNoValueLifeTime(const hidl_vec<uint32_t>& dimensions)
@@ -85,12 +58,6 @@ void ExecuteModel(const armnn_driver::hal_1_3::HalPolicy::Model& model,
     }
 }
 
-#ifndef ARMCOMPUTECL_ENABLED
-static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::CpuAcc }};
-#endif
-
 // Add our own tests here since we skip the qlstm tests which Google supplies (because of non-const weights)
 void QLstmTestImpl(const hidl_vec<uint32_t>&   inputDimensions,
                    const std::vector<int8_t>&   inputValue,
@@ -527,8 +494,9 @@ void QLstmTestImpl(const hidl_vec<uint32_t>&   inputDimensions,
     // check the results
     for (size_t i = 0; i < outputStateOutValue.size(); ++i)
     {
-        BOOST_TEST(TolerantCompareEqual(outputStateOutValue[i], outputStateOutData[i]),
-                   "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != " << outputStateOutData[i]);
+        DOCTEST_CHECK_MESSAGE(outputStateOutValue[i] == doctest::Approx( outputStateOutData[i] ).epsilon(TOLERANCE),
+                              "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != "
+                              << outputStateOutData[i]);
     }
 
     // CELL STATE OUTPUT Does not match currently: IVGCVSW-4860 Verify remaining VTS tests (2) for QLSTM
@@ -541,8 +509,8 @@ void QLstmTestImpl(const hidl_vec<uint32_t>&   inputDimensions,
 
     for (size_t i = 0; i < outputValue.size(); ++i)
     {
-        BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i]),
-                   "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+        DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx( outputData[i] ).epsilon(TOLERANCE),
+                              "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
     }
 }
 
@@ -1028,19 +996,38 @@ void DynamicOutputQLstmWithNoProjection(armnn::Compute compute)
 } // anonymous namespace
 
 // Support is not added yet
-//BOOST_DATA_TEST_CASE(QLSTMWithProjectionTest, COMPUTE_DEVICES)
+//TEST_CASE(QLSTMWithProjectionTest, COMPUTE_DEVICES)
 //{
 //     QLstmWithProjection(sample);
 //}
 
-BOOST_DATA_TEST_CASE(QLSTMWithNoProjectionTest, COMPUTE_DEVICES)
+DOCTEST_TEST_SUITE("QLSTMTests_CpuRef")
 {
-    QLstmWithNoProjection(sample);
-}
 
-BOOST_DATA_TEST_CASE(DynamicOutputQLSTMWithNoProjectionTest, COMPUTE_DEVICES)
-{
-    DynamicOutputQLstmWithNoProjection(sample);
+    DOCTEST_TEST_CASE("QLSTMWithNoProjectionTest_CpuRef")
+    {
+        QLstmWithNoProjection(armnn::Compute::CpuRef);
+    }
+
+    DOCTEST_TEST_CASE("DynamicOutputQLstmWithNoProjection_CpuRef")
+    {
+        DynamicOutputQLstmWithNoProjection(armnn::Compute::CpuRef);
+    }
+
 }
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("QLSTMTests_CpuAcc")
+{
 
-BOOST_AUTO_TEST_SUITE_END()
-\ No newline at end of file
+    DOCTEST_TEST_CASE("QLSTMWithNoProjectionTest_CpuAcc")
+    {
+        QLstmWithNoProjection(armnn::Compute::CpuAcc);
+    }
+
+    DOCTEST_TEST_CASE("DynamicOutputQLstmWithNoProjection_CpuAcc")
+    {
+        DynamicOutputQLstmWithNoProjection(armnn::Compute::CpuAcc);
+    }
+
+}
+#endif
diff --git a/test/1.3/QosTests.cpp b/test/1.3/QosTests.cpp
index d51152b..cd8ac33 100644
--- a/test/1.3/QosTests.cpp
+++ b/test/1.3/QosTests.cpp
@@ -4,18 +4,11 @@
 //
 
 #include "../DriverTestHelpers.hpp"
-#include "../TestTensor.hpp"
 
-#include "../1.3/HalPolicy.hpp"
-
-#include <armnn/utility/IgnoreUnused.hpp>
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
-
-
-BOOST_AUTO_TEST_SUITE(QosTests)
+#include <1.3/HalPolicy.hpp>
 
+DOCTEST_TEST_SUITE("QosTests")
+{
 using ArmnnDriver   = armnn_driver::ArmnnDriver;
 using DriverOptions = armnn_driver::DriverOptions;
 
@@ -40,13 +33,7 @@ void ExecuteModel(const armnn_driver::hal_1_3::HalPolicy::Model& model,
     }
 }
 
-#ifndef ARMCOMPUTECL_ENABLED
-static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::CpuAcc }};
-#endif
-
-BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
+DOCTEST_TEST_CASE("ConcurrentExecuteWithQosPriority")
 {
     ALOGI("ConcurrentExecuteWithQOSPriority: entry");
 
@@ -102,24 +89,24 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
         preparedModelsSize++;
     }
 
-    BOOST_TEST(maxRequests == preparedModelsSize);
+    DOCTEST_CHECK(maxRequests == preparedModelsSize);
 
     // construct the request data
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 3 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 1 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
+    V1_0::DataLocation inloc = {};
+    inloc.poolIndex          = 0;
+    inloc.offset             = 0;
+    inloc.length             = 3 * sizeof(float);
+    RequestArgument input    = {};
+    input.location           = inloc;
+    input.dimensions         = hidl_vec<uint32_t>{};
+
+    V1_0::DataLocation outloc = {};
+    outloc.poolIndex          = 1;
+    outloc.offset             = 0;
+    outloc.length             = 1 * sizeof(float);
+    RequestArgument output    = {};
+    output.location           = outloc;
+    output.dimensions         = hidl_vec<uint32_t>{};
 
     // build the requests
     V1_0::Request requests[maxRequests];
@@ -162,7 +149,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
     ALOGI("ConcurrentExecuteWithQOSPriority: waiting for callbacks");
     for (size_t i = 0; i < maxRequests; ++i)
     {
-        ARMNN_ASSERT(cb[i]);
+        DOCTEST_CHECK(cb[i]);
         cb[i]->wait();
     }
 
@@ -172,15 +159,15 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
     {
         if (i < 15)
         {
-            BOOST_TEST(outdata[i][0] == 152);
+            DOCTEST_CHECK(outdata[i][0] == 152);
         }
         else if (i < 30)
         {
-            BOOST_TEST(outdata[i][0] == 141);
+            DOCTEST_CHECK(outdata[i][0] == 141);
         }
         else
         {
-            BOOST_TEST(outdata[i][0] == 159);
+            DOCTEST_CHECK(outdata[i][0] == 159);
         }
 
     }
@@ -189,4 +176,4 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecuteWithQosPriority)
 
 } // anonymous namespace
 
-BOOST_AUTO_TEST_SUITE_END()
-\ No newline at end of file
+}
+\ No newline at end of file
diff --git a/test/Concat.cpp b/test/Concat.cpp
index 0bc5424..fc4a56c 100644
--- a/test/Concat.cpp
+++ b/test/Concat.cpp
@@ -1,36 +1,24 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+
 #include "DriverTestHelpers.hpp"
 #include "TestTensor.hpp"
 
-#include "../1.0/HalPolicy.hpp"
-
-#include <boost/test/unit_test.hpp>
-#include <boost/test/data/test_case.hpp>
-
 #include <array>
 #include <log/log.h>
 
-
-BOOST_AUTO_TEST_SUITE(ConcatTests)
-
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
 
 using HalPolicy = hal_1_0::HalPolicy;
+using RequestArgument = V1_0::RequestArgument;
 
 namespace
 {
 
-#ifndef ARMCOMPUTECL_ENABLED
-    static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-    static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
 void
 ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
                 int32_t concatAxis,
@@ -60,19 +48,19 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
     model.operations[0].outputs = hidl_vec<uint32_t>{static_cast<uint32_t>(inputs.size()+1)};
 
     // make the prepared model
-    V1_0::ErrorStatus prepareStatus=V1_0::ErrorStatus::NONE;
+    V1_0::ErrorStatus prepareStatus = V1_0::ErrorStatus::NONE;
     android::sp<V1_0::IPreparedModel> preparedModel = PrepareModelWithStatus(model,
                                                                              *driver,
                                                                              prepareStatus,
                                                                              expectedPrepareStatus);
-    BOOST_TEST(prepareStatus == expectedPrepareStatus);
+    DOCTEST_CHECK((int)prepareStatus == (int)expectedPrepareStatus);
     if (prepareStatus != V1_0::ErrorStatus::NONE)
     {
         // prepare failed, we cannot continue
         return;
     }
 
-    BOOST_TEST(preparedModel.get() != nullptr);
+    DOCTEST_CHECK(preparedModel.get() != nullptr);
     if (preparedModel.get() == nullptr)
     {
         // don't spoil other tests if prepare failed
@@ -89,7 +77,7 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
     // the inputs
     for (uint32_t i = 0; i<inputs.size(); ++i)
     {
-        DataLocation inloc = {};
+        V1_0::DataLocation inloc = {};
         inloc.poolIndex = i;
         inloc.offset = 0;
         inloc.length = inputs[i]->GetNumElements() * sizeof(float);
@@ -101,7 +89,7 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
 
     // and an additional memory pool is needed for the output
     {
-        DataLocation outloc = {};
+        V1_0::DataLocation outloc = {};
         outloc.poolIndex = inputs.size();
         outloc.offset = 0;
         outloc.length = expectedOutputTensor.GetNumElements() * sizeof(float);
@@ -129,9 +117,9 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
     float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
 
     // run the execution
-    ARMNN_ASSERT(preparedModel.get() != nullptr);
+    DOCTEST_CHECK(preparedModel.get() != nullptr);
     auto execStatus = Execute(preparedModel, request, expectedExecStatus);
-    BOOST_TEST(execStatus == expectedExecStatus);
+    DOCTEST_CHECK((int)execStatus == (int)expectedExecStatus);
 
     if (execStatus == V1_0::ErrorStatus::NONE)
     {
@@ -139,359 +127,607 @@ ConcatTestImpl(const std::vector<const TestTensor*> & inputs,
         const float * expectedOutput = expectedOutputTensor.GetData();
         for (unsigned int i=0; i<expectedOutputTensor.GetNumElements();++i)
         {
-            BOOST_TEST(outdata[i] == expectedOutput[i]);
+            DOCTEST_CHECK(outdata[i] == expectedOutput[i]);
         }
     }
 }
 
-} // namespace <anonymous>
-
-
-BOOST_DATA_TEST_CASE(SimpleConcatAxis0, COMPUTE_DEVICES)
+/// Test cases...
+void SimpleConcatAxis0(armnn::Compute computeDevice)
 {
     int32_t axis = 0;
-    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{3,1,1,1},{0,1,2}};
-
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    TestTensor expected{armnn::TensorShape{3, 1, 1, 1}, {0, 1, 2}};
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(ConcatAxis0_NoInterleave, COMPUTE_DEVICES)
+void ConcatAxis0NoInterleave(armnn::Compute computeDevice)
 {
     int32_t axis = 0;
-    TestTensor aIn{armnn::TensorShape{2,1,2,1},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{3,1,2,1},{4,  5,
-                                                6,  7,
-                                                8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10, 11}};
-
-    TestTensor expected{armnn::TensorShape{6,1,2,1},{0,  1,
-                                                     2,  3,
-                                                     4,  5,
-                                                     6,  7,
-                                                     8,  9,
-                                                     10, 11}};
-
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    TestTensor aIn{armnn::TensorShape{2, 1, 2, 1}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{3, 1, 2, 1}, {4, 5,
+                                                    6, 7,
+                                                    8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10, 11}};
+
+    TestTensor expected{armnn::TensorShape{6, 1, 2, 1}, {0, 1,
+                                                         2, 3,
+                                                         4, 5,
+                                                         6, 7,
+                                                         8, 9,
+                                                         10, 11}};
+
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxis1, COMPUTE_DEVICES)
+void SimpleConcatAxis1(armnn::Compute computeDevice)
 {
     int32_t axis = 1;
-    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{1,3,1,1},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{1, 3, 1, 1}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(ConcatAxis1_NoInterleave, COMPUTE_DEVICES)
+void ConcatAxis1NoInterleave(armnn::Compute computeDevice)
 {
     int32_t axis = 1;
-    TestTensor aIn{armnn::TensorShape{1,2,2,1},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,3,2,1},{4,  5,
-                                                6,  7,
-                                                8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10, 11}};
-
-    TestTensor expected{armnn::TensorShape{1,6,2,1},{0,  1,
-                                                     2,  3,
-                                                     4,  5,
-                                                     6,  7,
-                                                     8,  9,
-                                                     10, 11}};
-
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    TestTensor aIn{armnn::TensorShape{1, 2, 2, 1}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 3, 2, 1}, {4, 5,
+                                                    6, 7,
+                                                    8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10, 11}};
+
+    TestTensor expected{armnn::TensorShape{1, 6, 2, 1}, {0, 1,
+                                                         2, 3,
+                                                         4, 5,
+                                                         6, 7,
+                                                         8, 9,
+                                                         10, 11}};
+
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxis1_DoInterleave, COMPUTE_DEVICES)
+void SimpleConcatAxis1DoInterleave(armnn::Compute computeDevice)
 {
     int32_t axis = 1;
-    TestTensor aIn{armnn::TensorShape{2,2,1,1},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{2,3,1,1},{4,  5,  6,
-                                                7,  8,  9}};
-    TestTensor cIn{armnn::TensorShape{2,1,1,1},{10,
-                                                11}};
-
-    TestTensor expected{armnn::TensorShape{2,6,1,1},{0, 1, 4, 5, 6, 10,
-                                                     2, 3, 7, 8, 9, 11}};
+    TestTensor aIn{armnn::TensorShape{2, 2, 1, 1}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{2, 3, 1, 1}, {4, 5, 6,
+                                                    7, 8, 9}};
+    TestTensor cIn{armnn::TensorShape{2, 1, 1, 1}, {10,
+                                                    11}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    TestTensor expected{armnn::TensorShape{2, 6, 1, 1}, {0, 1, 4, 5, 6, 10,
+                                                         2, 3, 7, 8, 9, 11}};
+
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxis2, COMPUTE_DEVICES)
+void SimpleConcatAxis2(armnn::Compute computeDevice)
 {
     int32_t axis = 2;
-    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{1,1,3,1},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{1, 1, 3, 1}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(ConcatAxis2_NoInterleave, COMPUTE_DEVICES)
+void ConcatAxis2NoInterleave(armnn::Compute computeDevice)
 {
     int32_t axis = 2;
-    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,1,3,2},{4,  5,
-                                                6,  7,
-                                                8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,1,1,2},{10, 11}};
-
-    TestTensor expected{armnn::TensorShape{1,1,6,2},{0,  1,
-                                                     2,  3,
-                                                     4,  5,
-                                                     6,  7,
-                                                     8,  9,
-                                                     10, 11}};
-
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 3, 2}, {4, 5,
+                                                    6, 7,
+                                                    8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1, 2}, {10, 11}};
+
+    TestTensor expected{armnn::TensorShape{1, 1, 6, 2}, {0, 1,
+                                                         2, 3,
+                                                         4, 5,
+                                                         6, 7,
+                                                         8, 9,
+                                                         10, 11}};
+
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxis2_DoInterleave, COMPUTE_DEVICES)
+void SimpleConcatAxis2DoInterleave(armnn::Compute computeDevice)
 {
     int32_t axis = 2;
-    TestTensor aIn{armnn::TensorShape{1,2,2,1},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,2,3,1},{4,  5,  6,
-                                                7,  8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,2,1,1},{10,
-                                                11}};
-
-    TestTensor expected{armnn::TensorShape{1,2,6,1},{0, 1, 4, 5, 6, 10,
-                                                     2, 3, 7, 8, 9, 11}};
+    TestTensor aIn{armnn::TensorShape{1, 2, 2, 1}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 2, 3, 1}, {4, 5, 6,
+                                                    7, 8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 2, 1, 1}, {10,
+                                                    11}};
+
+    TestTensor expected{armnn::TensorShape{1, 2, 6, 1}, {0, 1, 4, 5, 6, 10,
+                                                         2, 3, 7, 8, 9, 11}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxis3, COMPUTE_DEVICES)
+void SimpleConcatAxis3(armnn::Compute computeDevice)
 {
     int32_t axis = 3;
-    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1,1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{1,1,1,3},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{1, 1, 1, 3}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxis3_DoInterleave, COMPUTE_DEVICES)
+void SimpleConcatAxis3DoInterleave(armnn::Compute computeDevice)
 {
     int32_t axis = 3;
-    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
-                                                7,  8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
-                                                11}};
-
-    TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
-                                                     2, 3, 7, 8, 9, 11}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+                                                    7, 8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+                                                    11}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    TestTensor expected{armnn::TensorShape{1, 1, 2, 6}, {0, 1, 4, 5, 6, 10,
+                                                         2, 3, 7, 8, 9, 11}};
+
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(AxisTooBig, COMPUTE_DEVICES)
+void AxisTooBig(armnn::Compute computeDevice)
 {
     int32_t axis = 4;
-    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
 
     // The axis must be within the range of [-rank(values), rank(values))
     // see: https://www.tensorflow.org/api_docs/python/tf/concat
-    TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor uncheckedOutput{armnn::TensorShape{1, 1, 1, 1}, {0}};
     V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
-    ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus);
+    ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, computeDevice, expectedParserStatus);
 }
 
-BOOST_DATA_TEST_CASE(AxisTooSmall, COMPUTE_DEVICES)
+void AxisTooSmall(armnn::Compute computeDevice)
 {
     int32_t axis = -5;
-    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
 
     // The axis must be within the range of [-rank(values), rank(values))
     // see: https://www.tensorflow.org/api_docs/python/tf/concat
-    TestTensor uncheckedOutput{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor uncheckedOutput{armnn::TensorShape{1, 1, 1, 1}, {0}};
     V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
-    ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, sample, expectedParserStatus);
+    ConcatTestImpl({&aIn, &bIn}, axis, uncheckedOutput, computeDevice, expectedParserStatus);
 }
 
-BOOST_DATA_TEST_CASE(TooFewInputs, COMPUTE_DEVICES)
+void TooFewInputs(armnn::Compute computeDevice)
 {
     int32_t axis = 0;
-    TestTensor aIn{armnn::TensorShape{1,1,1,1},{0}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1, 1}, {0}};
 
     // We need at least two tensors to concatenate
     V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
-    ConcatTestImpl({&aIn}, axis, aIn, sample, expectedParserStatus);
+    ConcatTestImpl({&aIn}, axis, aIn, computeDevice, expectedParserStatus);
 }
 
-BOOST_DATA_TEST_CASE(MismatchedInputDimensions, COMPUTE_DEVICES)
+void MismatchedInputDimensions(armnn::Compute computeDevice)
 {
     int32_t axis = 3;
-    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
-                                                7,  8,  9}};
-    TestTensor mismatched{armnn::TensorShape{1,1,1,1},{10}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+                                                    7, 8, 9}};
+    TestTensor mismatched{armnn::TensorShape{1, 1, 1, 1}, {10}};
 
-    TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
-                                                     2, 3, 7, 8, 9, 11}};
+    TestTensor expected{armnn::TensorShape{1, 1, 2, 6}, {0, 1, 4, 5, 6, 10,
+                                                         2, 3, 7, 8, 9, 11}};
 
     // The input dimensions must be compatible
     V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
-    ConcatTestImpl({&aIn, &bIn, &mismatched}, axis, expected, sample, expectedParserStatus);
+    ConcatTestImpl({&aIn, &bIn, &mismatched}, axis, expected, computeDevice, expectedParserStatus);
 }
 
-BOOST_DATA_TEST_CASE(MismatchedInputRanks, COMPUTE_DEVICES)
+void MismatchedInputRanks(armnn::Compute computeDevice)
 {
     int32_t axis = 2;
-    TestTensor aIn{armnn::TensorShape{1,1,2},{0,1}};
-    TestTensor bIn{armnn::TensorShape{1,1},{4}};
-    TestTensor expected{armnn::TensorShape{1,1,3},{0,1,4}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 2}, {0, 1}};
+    TestTensor bIn{armnn::TensorShape{1, 1}, {4}};
+    TestTensor expected{armnn::TensorShape{1, 1, 3}, {0, 1, 4}};
 
     // The input dimensions must be compatible
     V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
-    ConcatTestImpl({&aIn, &bIn}, axis, expected, sample, expectedParserStatus);
+    ConcatTestImpl({&aIn, &bIn}, axis, expected, computeDevice, expectedParserStatus);
 }
 
-BOOST_DATA_TEST_CASE(MismatchedOutputDimensions, COMPUTE_DEVICES)
+void MismatchedOutputDimensions(armnn::Compute computeDevice)
 {
     int32_t axis = 3;
-    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
-                                                7,  8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
-                                                11}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+                                                    7, 8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+                                                    11}};
 
-    TestTensor mismatched{armnn::TensorShape{1,1,6,2},{0, 1, 4, 5, 6, 10,
-                                                       2, 3, 7, 8, 9, 11}};
+    TestTensor mismatched{armnn::TensorShape{1, 1, 6, 2}, {0, 1, 4, 5, 6, 10,
+                                                           2, 3, 7, 8, 9, 11}};
 
     // The input and output dimensions must be compatible
     V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, computeDevice, expectedParserStatus);
 }
 
-BOOST_DATA_TEST_CASE(MismatchedOutputRank, COMPUTE_DEVICES)
+void MismatchedOutputRank(armnn::Compute computeDevice)
 {
     int32_t axis = 3;
-    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
-                                                7,  8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
-                                                11}};
-
-    TestTensor mismatched{armnn::TensorShape{6,2},{0, 1, 4, 5, 6, 10,
-                                                   2, 3, 7, 8, 9, 11}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+                                                    7, 8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+                                                    11}};
+
+    TestTensor mismatched{armnn::TensorShape{6, 2}, {0, 1, 4, 5, 6, 10,
+                                                     2, 3, 7, 8, 9, 11}};
 
     // The input and output ranks must match
     V1_0::ErrorStatus expectedParserStatus = V1_0::ErrorStatus::GENERAL_FAILURE;
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, sample, expectedParserStatus);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, mismatched, computeDevice, expectedParserStatus);
 }
 
-BOOST_DATA_TEST_CASE(ValidNegativeAxis, COMPUTE_DEVICES)
+void ValidNegativeAxis(armnn::Compute computeDevice)
 {
     // this is the same as 3
     // see: https://www.tensorflow.org/api_docs/python/tf/concat
     int32_t axis = -1;
-    TestTensor aIn{armnn::TensorShape{1,1,2,2},{0,  1,
-                                                2,  3}};
-    TestTensor bIn{armnn::TensorShape{1,1,2,3},{4,  5,  6,
-                                                7,  8,  9}};
-    TestTensor cIn{armnn::TensorShape{1,1,2,1},{10,
-                                                11}};
-
-    TestTensor expected{armnn::TensorShape{1,1,2,6},{0, 1, 4, 5, 6, 10,
-                                                     2, 3, 7, 8, 9, 11}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 2, 2}, {0, 1,
+                                                    2, 3}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 2, 3}, {4, 5, 6,
+                                                    7, 8, 9}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 2, 1}, {10,
+                                                    11}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    TestTensor expected{armnn::TensorShape{1, 1, 2, 6}, {0, 1, 4, 5, 6, 10,
+                                                         2, 3, 7, 8, 9, 11}};
+
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxisZero3D, COMPUTE_DEVICES)
+void SimpleConcatAxisZero3D(armnn::Compute computeDevice)
 {
     int32_t axis = 0;
-    TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{3,1,1},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{3, 1, 1}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxisOne3D, COMPUTE_DEVICES)
+void SimpleConcatAxisOne3D(armnn::Compute computeDevice)
 {
     int32_t axis = 1;
-    TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{1,3,1},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{1, 3, 1}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxisTwo3D, COMPUTE_DEVICES)
+void SimpleConcatAxisTwo3D(armnn::Compute computeDevice)
 {
     int32_t axis = 2;
-    TestTensor aIn{armnn::TensorShape{1,1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{1,1,3},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{1, 1, 3}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxisZero2D, COMPUTE_DEVICES)
+void SimpleConcatAxisZero2D(armnn::Compute computeDevice)
 {
     int32_t axis = 0;
-    TestTensor aIn{armnn::TensorShape{1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{3,1},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{3, 1}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxisOne2D, COMPUTE_DEVICES)
+void SimpleConcatAxisOne2D(armnn::Compute computeDevice)
 {
     int32_t axis = 1;
-    TestTensor aIn{armnn::TensorShape{1,1},{0}};
-    TestTensor bIn{armnn::TensorShape{1,1},{1}};
-    TestTensor cIn{armnn::TensorShape{1,1},{2}};
+    TestTensor aIn{armnn::TensorShape{1, 1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1, 1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1, 1}, {2}};
 
-    TestTensor expected{armnn::TensorShape{1,3},{0,1,2}};
+    TestTensor expected{armnn::TensorShape{1, 3}, {0, 1, 2}};
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
 }
 
-BOOST_DATA_TEST_CASE(SimpleConcatAxisZero1D, COMPUTE_DEVICES)
+void SimpleConcatAxisZero1D(armnn::Compute computeDevice)
 {
     int32_t axis = 0;
-    TestTensor aIn{armnn::TensorShape{1},{0}};
-    TestTensor bIn{armnn::TensorShape{1},{1}};
-    TestTensor cIn{armnn::TensorShape{1},{2}};
+    TestTensor aIn{armnn::TensorShape{1}, {0}};
+    TestTensor bIn{armnn::TensorShape{1}, {1}};
+    TestTensor cIn{armnn::TensorShape{1}, {2}};
+
+    TestTensor expected{armnn::TensorShape{3}, {0, 1, 2}};
+    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, computeDevice);
+}
+
+} // namespace <anonymous>
+
+DOCTEST_TEST_SUITE("ConcatTests_CpuRef")
+{
+
+DOCTEST_TEST_CASE("SimpleConcatAxis0")
+{
+    SimpleConcatAxis0(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis0NoInterleave")
+{
+    ConcatAxis0NoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1")
+{
+    SimpleConcatAxis1(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis1NoInterleave")
+{
+    ConcatAxis1NoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1DoInterleave")
+{
+    SimpleConcatAxis1DoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2")
+{
+    SimpleConcatAxis2(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis2NoInterleave")
+{
+    ConcatAxis2NoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2DoInterleave")
+{
+    SimpleConcatAxis2DoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3")
+{
+    SimpleConcatAxis3(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3DoInterleave")
+{
+    SimpleConcatAxis3DoInterleave(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("AxisTooBig")
+{
+    AxisTooBig(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("AxisTooSmall")
+{
+    AxisTooSmall(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("TooFewInputs")
+{
+    TooFewInputs(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputDimensions")
+{
+    MismatchedInputDimensions(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputRanks")
+{
+    MismatchedInputRanks(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputDimensions")
+{
+    MismatchedOutputDimensions(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputRank")
+{
+    MismatchedOutputRank(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("ValidNegativeAxis")
+{
+    ValidNegativeAxis(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero3D")
+{
+    SimpleConcatAxisZero3D(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisOne3D")
+{
+    SimpleConcatAxisOne3D(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisTwo3D")
+{
+    SimpleConcatAxisTwo3D(armnn::Compute::CpuRef);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero2D")
+{
+    SimpleConcatAxisZero2D(armnn::Compute::CpuRef);
+}
 
-    TestTensor expected{armnn::TensorShape{3},{0,1,2}};
+DOCTEST_TEST_CASE("SimpleConcatAxisOne2D")
+{
+    SimpleConcatAxisOne2D(armnn::Compute::CpuRef);
+}
 
-    ConcatTestImpl({&aIn, &bIn, &cIn}, axis, expected, sample);
+DOCTEST_TEST_CASE("SimpleConcatAxisZero1D")
+{
+    SimpleConcatAxisZero1D(armnn::Compute::CpuRef);
+}
+
+}
+
+#ifdef ARMCOMPUTECL_ENABLED
+DOCTEST_TEST_SUITE("ConcatTests_GpuAcc")
+{
+
+DOCTEST_TEST_CASE("SimpleConcatAxis0")
+{
+    SimpleConcatAxis0(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis0NoInterleave")
+{
+    ConcatAxis0NoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1")
+{
+    SimpleConcatAxis1(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis1NoInterleave")
+{
+    ConcatAxis1NoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis1DoInterleave")
+{
+    SimpleConcatAxis1DoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2")
+{
+    SimpleConcatAxis2(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ConcatAxis2NoInterleave")
+{
+    ConcatAxis2NoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis2DoInterleave")
+{
+    SimpleConcatAxis2DoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3")
+{
+    SimpleConcatAxis3(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxis3DoInterleave")
+{
+    SimpleConcatAxis3DoInterleave(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("AxisTooBig")
+{
+    AxisTooBig(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("AxisTooSmall")
+{
+    AxisTooSmall(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("TooFewInputs")
+{
+    TooFewInputs(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputDimensions")
+{
+    MismatchedInputDimensions(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedInputRanks")
+{
+    MismatchedInputRanks(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputDimensions")
+{
+    MismatchedOutputDimensions(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("MismatchedOutputRank")
+{
+    MismatchedOutputRank(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("ValidNegativeAxis")
+{
+    ValidNegativeAxis(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero3D")
+{
+    SimpleConcatAxisZero3D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisOne3D")
+{
+    SimpleConcatAxisOne3D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisTwo3D")
+{
+    SimpleConcatAxisTwo3D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero2D")
+{
+    SimpleConcatAxisZero2D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisOne2D")
+{
+    SimpleConcatAxisOne2D(armnn::Compute::GpuAcc);
+}
+
+DOCTEST_TEST_CASE("SimpleConcatAxisZero1D")
+{
+    SimpleConcatAxisZero1D(armnn::Compute::GpuAcc);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}// End of GpuAcc Test Suite
+#endif
+\ No newline at end of file
diff --git a/test/Concurrent.cpp b/test/Concurrent.cpp
index aeea17f..71119cd 100644
--- a/test/Concurrent.cpp
+++ b/test/Concurrent.cpp
@@ -1,20 +1,18 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#include "DriverTestHelpers.hpp"
-
-#include "../1.0/HalPolicy.hpp"
 
-#include <boost/test/unit_test.hpp>
+#include "DriverTestHelpers.hpp"
 
 #include <log/log.h>
 
-BOOST_AUTO_TEST_SUITE(ConcurrentDriverTests)
-
+DOCTEST_TEST_SUITE("ConcurrentDriverTests")
+{
 using ArmnnDriver   = armnn_driver::ArmnnDriver;
 using DriverOptions = armnn_driver::DriverOptions;
 using HalPolicy     = armnn_driver::hal_1_0::HalPolicy;
+using RequestArgument = V1_0::RequestArgument;
 
 using namespace android::nn;
 using namespace android::hardware;
@@ -25,7 +23,7 @@ using namespace armnn_driver;
 // The main point of this test is to check that multiple requests can be
 // executed without waiting for the callback from previous execution.
 // The operations performed are not significant.
-BOOST_AUTO_TEST_CASE(ConcurrentExecute)
+DOCTEST_TEST_CASE("ConcurrentExecute")
 {
     ALOGI("ConcurrentExecute: entry");
 
@@ -63,36 +61,37 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
         }
     }
 
-    BOOST_TEST(maxRequests == preparedModelsSize);
+    DOCTEST_CHECK(maxRequests == preparedModelsSize);
 
     // construct the request data
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 3 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 1 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
+    V1_0::DataLocation inloc = {};
+    inloc.poolIndex          = 0;
+    inloc.offset             = 0;
+    inloc.length             = 3 * sizeof(float);
+    RequestArgument input    = {};
+    input.location           = inloc;
+    input.dimensions         = hidl_vec<uint32_t>{};
+
+    V1_0::DataLocation outloc = {};
+    outloc.poolIndex          = 1;
+    outloc.offset             = 0;
+    outloc.length             = 1 * sizeof(float);
+    RequestArgument output    = {};
+    output.location           = outloc;
+    output.dimensions         = hidl_vec<uint32_t>{};
 
     // build the requests
     V1_0::Request requests[maxRequests];
+    android::sp<IMemory> inMemory[maxRequests];
     android::sp<IMemory> outMemory[maxRequests];
+    float indata[] = {2, 32, 16};
     float* outdata[maxRequests];
     for (size_t i = 0; i < maxRequests; ++i)
     {
         requests[i].inputs  = hidl_vec<RequestArgument>{input};
         requests[i].outputs = hidl_vec<RequestArgument>{output};
         // set the input data (matching source test)
-        float indata[] = {2, 32, 16};
-        AddPoolAndSetData<float>(3, requests[i], indata);
+        inMemory[i] = AddPoolAndSetData<float>(3, requests[i], indata);
         // add memory for the output
         outMemory[i] = AddPoolAndGetData<float>(1, requests[i]);
         outdata[i] = static_cast<float*>(static_cast<void*>(outMemory[i]->getPointer()));
@@ -110,7 +109,7 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
     ALOGI("ConcurrentExecute: waiting for callbacks");
     for (size_t i = 0; i < maxRequests; ++i)
     {
-        ARMNN_ASSERT(cb[i]);
+        DOCTEST_CHECK(cb[i]);
         cb[i]->wait();
     }
 
@@ -118,9 +117,9 @@ BOOST_AUTO_TEST_CASE(ConcurrentExecute)
     ALOGI("ConcurrentExecute: validating results");
     for (size_t i = 0; i < maxRequests; ++i)
     {
-        BOOST_TEST(outdata[i][0] == 152);
+        DOCTEST_CHECK(outdata[i][0] == 152);
     }
     ALOGI("ConcurrentExecute: exit");
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/Convolution2D.hpp b/test/Convolution2D.hpp
index c471fbd..cc26f68 100644
--- a/test/Convolution2D.hpp
+++ b/test/Convolution2D.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -7,27 +7,26 @@
 
 #include "DriverTestHelpers.hpp"
 
-#include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
 #include <OperationsUtils.h>
 
-BOOST_AUTO_TEST_SUITE(Convolution2DTests)
-
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
 
+using RequestArgument = V1_0::RequestArgument;
+
 namespace driverTestHelpers
 {
 #define ARMNN_ANDROID_FP16_TEST(result, fp16Expectation, fp32Expectation, fp16Enabled) \
    if (fp16Enabled) \
    { \
-       BOOST_TEST((result == fp16Expectation || result == fp32Expectation), result << \
+       DOCTEST_CHECK_MESSAGE((result == fp16Expectation || result == fp32Expectation), result << \
        " does not match either " << fp16Expectation << "[fp16] or " << fp32Expectation << "[fp32]"); \
    } else \
    { \
-      BOOST_TEST(result == fp32Expectation); \
+      DOCTEST_CHECK(result == fp32Expectation); \
    }
 
 void SetModelFp16Flag(V1_0::Model& model, bool fp16Enabled);
@@ -53,47 +52,47 @@ void PaddingTestImpl(android::nn::PaddingScheme paddingScheme, bool fp16Enabled
 
     // add operands
     float weightValue[] = {1.f, -1.f, 0.f, 1.f};
-    float biasValue[]   = {0.f};
+    float biasValue[] = {0.f};
 
-    AddInputOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 2, 3, 1});
-    AddTensorOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 2, 2, 1}, weightValue);
-    AddTensorOperand<HalPolicy>(model, hidl_vec<uint32_t>{1}, biasValue);
-    AddIntOperand<HalPolicy>(model, (int32_t)paddingScheme); // padding
+    AddInputOperand<HalPolicy>(model, hidl_vec < uint32_t > {1, 2, 3, 1});
+    AddTensorOperand<HalPolicy>(model, hidl_vec < uint32_t > {1, 2, 2, 1}, weightValue);
+    AddTensorOperand<HalPolicy>(model, hidl_vec < uint32_t > {1}, biasValue);
+    AddIntOperand<HalPolicy>(model, (int32_t) paddingScheme); // padding
     AddIntOperand<HalPolicy>(model, 2); // stride x
     AddIntOperand<HalPolicy>(model, 2); // stride y
     AddIntOperand<HalPolicy>(model, 0); // no activation
-    AddOutputOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 1, outSize, 1});
+    AddOutputOperand<HalPolicy>(model, hidl_vec < uint32_t > {1, 1, outSize, 1});
 
     // make the convolution operation
     model.operations.resize(1);
     model.operations[0].type = HalOperationType::CONV_2D;
-    model.operations[0].inputs  = hidl_vec<uint32_t>{0, 1, 2, 3, 4, 5, 6};
-    model.operations[0].outputs = hidl_vec<uint32_t>{7};
+    model.operations[0].inputs = hidl_vec < uint32_t > {0, 1, 2, 3, 4, 5, 6};
+    model.operations[0].outputs = hidl_vec < uint32_t > {7};
 
     // make the prepared model
     SetModelFp16Flag(model, fp16Enabled);
     android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
 
     // construct the request
-    DataLocation inloc    = {};
-    inloc.poolIndex       = 0;
-    inloc.offset          = 0;
-    inloc.length          = 6 * sizeof(float);
+    V1_0::DataLocation inloc = {};
+    inloc.poolIndex = 0;
+    inloc.offset = 0;
+    inloc.length = 6 * sizeof(float);
     RequestArgument input = {};
-    input.location        = inloc;
-    input.dimensions      = hidl_vec<uint32_t>{};
+    input.location = inloc;
+    input.dimensions = hidl_vec < uint32_t > {};
 
-    DataLocation outloc    = {};
-    outloc.poolIndex       = 1;
-    outloc.offset          = 0;
-    outloc.length          = outSize * sizeof(float);
+    V1_0::DataLocation outloc = {};
+    outloc.poolIndex = 1;
+    outloc.offset = 0;
+    outloc.length = outSize * sizeof(float);
     RequestArgument output = {};
-    output.location        = outloc;
-    output.dimensions      = hidl_vec<uint32_t>{};
+    output.location = outloc;
+    output.dimensions = hidl_vec < uint32_t > {};
 
     V1_0::Request request = {};
-    request.inputs  = hidl_vec<RequestArgument>{input};
-    request.outputs = hidl_vec<RequestArgument>{output};
+    request.inputs = hidl_vec < RequestArgument > {input};
+    request.outputs = hidl_vec < RequestArgument > {output};
 
     // set the input data (matching source test)
     float indata[] = {1024.25f, 1.f, 0.f, 3.f, -1, -1024.25f};
@@ -112,19 +111,17 @@ void PaddingTestImpl(android::nn::PaddingScheme paddingScheme, bool fp16Enabled
     // check the result
     switch (paddingScheme)
     {
-    case android::nn::kPaddingValid:
-        ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
-        break;
-    case android::nn::kPaddingSame:
-        ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
-        BOOST_TEST(outdata[1] == 0.f);
-        break;
-    default:
-        BOOST_TEST(false);
-        break;
+        case android::nn::kPaddingValid:
+            ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
+            break;
+        case android::nn::kPaddingSame:
+            ARMNN_ANDROID_FP16_TEST(outdata[0], 1022.f, 1022.25f, fp16Enabled)
+            DOCTEST_CHECK(outdata[1] == 0.f);
+            break;
+        default:
+            DOCTEST_CHECK(false);
+            break;
     }
 }
 
 } // namespace driverTestHelpers
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/Dilation.hpp b/test/Dilation.hpp
index d0189c9..dbd2493 100644
--- a/test/Dilation.hpp
+++ b/test/Dilation.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -7,17 +7,12 @@
 
 #include "DriverTestHelpers.hpp"
 
-#include <armnn/LayerVisitorBase.hpp>
+#include <armnn/StrategyBase.hpp>
 #include <armnn/utility/IgnoreUnused.hpp>
 
-#include <boost/test/unit_test.hpp>
-
 #include <numeric>
 
-BOOST_AUTO_TEST_SUITE(DilationTests)
-
 using namespace armnn;
-using namespace boost;
 using namespace driverTestHelpers;
 
 struct DilationTestOptions
@@ -35,7 +30,7 @@ struct DilationTestOptions
     bool m_HasDilation;
 };
 
-class DilationTestVisitor : public LayerVisitorBase<VisitorThrowingPolicy>
+class DilationTestVisitor : public StrategyBase<ThrowingStrategy>
 {
 public:
     DilationTestVisitor() :
@@ -47,32 +42,32 @@ public:
         m_ExpectedDilationY{expectedDilationY}
     {}
 
-    void VisitConvolution2dLayer(const IConnectableLayer *layer,
-                                 const Convolution2dDescriptor& descriptor,
-                                 const ConstTensor& weights,
-                                 const Optional<ConstTensor>& biases,
-                                 const char *name = nullptr) override
-    {
-        IgnoreUnused(layer);
-        IgnoreUnused(weights);
-        IgnoreUnused(biases);
-        IgnoreUnused(name);
-
-        CheckDilationParams(descriptor);
-    }
-
-    void VisitDepthwiseConvolution2dLayer(const IConnectableLayer *layer,
-                                          const DepthwiseConvolution2dDescriptor& descriptor,
-                                          const ConstTensor& weights,
-                                          const Optional<ConstTensor>& biases,
-                                          const char *name = nullptr) override
+    void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+                         const armnn::BaseDescriptor& descriptor,
+                         const std::vector<armnn::ConstTensor>& constants,
+                         const char* name,
+                         const armnn::LayerBindingId id = 0) override
     {
-        IgnoreUnused(layer);
-        IgnoreUnused(weights);
-        IgnoreUnused(biases);
-        IgnoreUnused(name);
-
-        CheckDilationParams(descriptor);
+        armnn::IgnoreUnused(layer, constants, id, name);
+        switch (layer->GetType())
+        {
+            case armnn::LayerType::Constant:
+                break;
+            case armnn::LayerType::Convolution2d:
+            {
+                CheckDilationParams(static_cast<const armnn::Convolution2dDescriptor&>(descriptor));
+                break;
+            }
+            case armnn::LayerType::DepthwiseConvolution2d:
+            {
+                CheckDilationParams(static_cast<const armnn::DepthwiseConvolution2dDescriptor&>(descriptor));
+                break;
+            }
+            default:
+            {
+                m_DefaultStrategy.Apply(GetLayerTypeAsCString(layer->GetType()));
+            }
+        }
     }
 
 private:
@@ -82,8 +77,8 @@ private:
     template<typename ConvolutionDescriptor>
     void CheckDilationParams(const ConvolutionDescriptor& descriptor)
     {
-        BOOST_CHECK_EQUAL(descriptor.m_DilationX, m_ExpectedDilationX);
-        BOOST_CHECK_EQUAL(descriptor.m_DilationY, m_ExpectedDilationY);
+        CHECK_EQ(descriptor.m_DilationX, m_ExpectedDilationX);
+        CHECK_EQ(descriptor.m_DilationY, m_ExpectedDilationY);
     }
 };
 
@@ -169,11 +164,9 @@ void DilationTestImpl(const DilationTestOptions& options)
     data.m_OutputSlotForOperand = std::vector<IOutputSlot*>(model.operands.size(), nullptr);
 
     bool ok = HalPolicy::ConvertOperation(model.operations[0], model, data);
-    BOOST_CHECK(ok);
+    DOCTEST_CHECK(ok);
 
     // check if dilation params are as expected
     DilationTestVisitor visitor = options.m_HasDilation ? DilationTestVisitor(2, 2) : DilationTestVisitor();
-    data.m_Network->Accept(visitor);
+    data.m_Network->ExecuteStrategy(visitor);
 }
-
-BOOST_AUTO_TEST_SUITE_END()
diff --git a/test/DriverTestHelpers.cpp b/test/DriverTestHelpers.cpp
index 8e8d7be..1f9fc1e 100644
--- a/test/DriverTestHelpers.cpp
+++ b/test/DriverTestHelpers.cpp
@@ -1,10 +1,10 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+
 #include "DriverTestHelpers.hpp"
 #include <log/log.h>
-#include <boost/test/unit_test.hpp>
 
 namespace android
 {
@@ -139,10 +139,10 @@ android::sp<V1_0::IPreparedModel> PrepareModelWithStatus(const V1_0::Model& mode
     driver.prepareModel(model, cb);
 
     prepareStatus = cb->GetErrorStatus();
-    BOOST_TEST(prepareStatus == expectedStatus);
+    DOCTEST_CHECK((int)prepareStatus == (int)expectedStatus);
     if (expectedStatus == V1_0::ErrorStatus::NONE)
     {
-        BOOST_TEST((cb->GetPreparedModel() != nullptr));
+        DOCTEST_CHECK((cb->GetPreparedModel() != nullptr));
     }
     return cb->GetPreparedModel();
 }
@@ -158,10 +158,10 @@ android::sp<V1_0::IPreparedModel> PrepareModelWithStatus(const V1_1::Model& mode
     driver.prepareModel_1_1(model, V1_1::ExecutionPreference::LOW_POWER, cb);
 
     prepareStatus = cb->GetErrorStatus();
-    BOOST_TEST(prepareStatus == expectedStatus);
+    DOCTEST_CHECK((int)prepareStatus == (int)expectedStatus);
     if (expectedStatus == V1_0::ErrorStatus::NONE)
     {
-        BOOST_TEST((cb->GetPreparedModel() != nullptr));
+        DOCTEST_CHECK((cb->GetPreparedModel() != nullptr));
     }
     return cb->GetPreparedModel();
 }
@@ -184,10 +184,10 @@ android::sp<V1_2::IPreparedModel> PrepareModelWithStatus_1_2(const armnn_driver:
     driver.prepareModel_1_2(model, V1_1::ExecutionPreference::LOW_POWER, emptyHandle1, emptyHandle2, emptyToken, cb);
 
     prepareStatus = cb->GetErrorStatus();
-    BOOST_TEST(prepareStatus == expectedStatus);
+    DOCTEST_CHECK((int)prepareStatus == (int)expectedStatus);
     if (expectedStatus == V1_0::ErrorStatus::NONE)
     {
-        BOOST_TEST((cb->GetPreparedModel_1_2() != nullptr));
+        DOCTEST_CHECK((cb->GetPreparedModel_1_2() != nullptr));
     }
     return cb->GetPreparedModel_1_2();
 }
@@ -219,7 +219,7 @@ android::sp<V1_3::IPreparedModel> PrepareModelWithStatus_1_3(const armnn_driver:
     prepareStatus = cb->Get_1_3_ErrorStatus();
     if (prepareStatus == V1_3::ErrorStatus::NONE)
     {
-        BOOST_TEST((cb->GetPreparedModel_1_3() != nullptr));
+        DOCTEST_CHECK((cb->GetPreparedModel_1_3() != nullptr));
     }
     return cb->GetPreparedModel_1_3();
 }
@@ -230,10 +230,10 @@ V1_0::ErrorStatus Execute(android::sp<V1_0::IPreparedModel> preparedModel,
                           const V1_0::Request& request,
                           V1_0::ErrorStatus expectedStatus)
 {
-    BOOST_TEST(preparedModel.get() != nullptr);
+    DOCTEST_CHECK(preparedModel.get() != nullptr);
     android::sp<ExecutionCallback> cb(new ExecutionCallback());
     V1_0::ErrorStatus execStatus = preparedModel->execute(request, cb);
-    BOOST_TEST(execStatus == expectedStatus);
+    DOCTEST_CHECK((int)execStatus == (int)expectedStatus);
     ALOGI("Execute: waiting for callback to be invoked");
     cb->wait();
     return execStatus;
@@ -242,9 +242,10 @@ V1_0::ErrorStatus Execute(android::sp<V1_0::IPreparedModel> preparedModel,
 android::sp<ExecutionCallback> ExecuteNoWait(android::sp<V1_0::IPreparedModel> preparedModel,
                                              const V1_0::Request& request)
 {
-    BOOST_TEST(preparedModel.get() != nullptr);
+    DOCTEST_CHECK(preparedModel.get() != nullptr);
     android::sp<ExecutionCallback> cb(new ExecutionCallback());
-    BOOST_TEST(preparedModel->execute(request, cb) == V1_0::ErrorStatus::NONE);
+    V1_0::ErrorStatus execStatus = preparedModel->execute(request, cb);
+    DOCTEST_CHECK((int)execStatus == (int)V1_0::ErrorStatus::NONE);
     ALOGI("ExecuteNoWait: returning callback object");
     return cb;
 }
diff --git a/test/DriverTestHelpers.hpp b/test/DriverTestHelpers.hpp
index 05b7892..98be090 100644
--- a/test/DriverTestHelpers.hpp
+++ b/test/DriverTestHelpers.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -10,12 +10,21 @@
 
 #include "../ArmnnDriver.hpp"
 #include <iosfwd>
-#include <boost/test/unit_test.hpp>
-
 #include <android/hidl/allocator/1.0/IAllocator.h>
 
+// Some of the short name macros from 'third-party/doctest/doctest.h' clash with macros in
+// 'system/core/base/include/android-base/logging.h' so we use the full DOCTEST macro names
+#ifndef DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES
+#define DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES
+#endif // DOCTEST_CONFIG_NO_SHORT_MACRO_NAMES
+
+#include <doctest/doctest.h>
+
+using RequestArgument = V1_0::RequestArgument;
 using ::android::hidl::allocator::V1_0::IAllocator;
 
+using ::android::hidl::memory::V1_0::IMemory;
+
 namespace android
 {
 namespace hardware
@@ -164,7 +173,7 @@ android::sp<IMemory> AddPoolAndGetData(uint32_t size, V1_0::Request& request)
 
     android::sp<IAllocator> allocator = IAllocator::getService("ashmem");
     allocator->allocate(sizeof(T) * size, [&](bool success, const hidl_memory& mem) {
-        BOOST_TEST(success);
+        DOCTEST_CHECK(success);
         pool = mem;
     });
 
@@ -177,13 +186,15 @@ android::sp<IMemory> AddPoolAndGetData(uint32_t size, V1_0::Request& request)
 }
 
 template<typename T>
-void AddPoolAndSetData(uint32_t size, V1_0::Request& request, const T* data)
+android::sp<IMemory> AddPoolAndSetData(uint32_t size, V1_0::Request& request, const T* data)
 {
     android::sp<IMemory> memory = AddPoolAndGetData<T>(size, request);
 
     T* dst = static_cast<T*>(static_cast<void*>(memory->getPointer()));
 
     memcpy(dst, data, size * sizeof(T));
+
+    return memory;
 }
 
 template<typename HalPolicy,
@@ -202,7 +213,7 @@ void AddBoolOperand(HalModel& model, bool value, uint32_t numberOfConsumers = 1)
     using HalOperandType     = typename HalPolicy::OperandType;
     using HalOperandLifeTime = typename HalPolicy::OperandLifeTime;
 
-    DataLocation location = {};
+    V1_0::DataLocation location = {};
     location.offset = model.operandValues.size();
     location.length = sizeof(uint8_t);
 
@@ -420,7 +431,7 @@ void AddTensorOperand(HalModel& model,
         totalElements *= dim;
     }
 
-    DataLocation location = {};
+    V1_0::DataLocation location = {};
     location.length = totalElements * sizeof(T);
 
     if(operandLifeTime == HalOperandLifeTime::CONSTANT_COPY)
@@ -477,7 +488,7 @@ void AddIntOperand(HalModel& model, int32_t value, uint32_t numberOfConsumers =
     using HalOperandType     = typename HalPolicy::OperandType;
     using HalOperandLifeTime = typename HalPolicy::OperandLifeTime;
 
-    DataLocation location = {};
+    V1_0::DataLocation location = {};
     location.offset = model.operandValues.size();
     location.length = sizeof(int32_t);
 
@@ -503,7 +514,7 @@ void AddFloatOperand(HalModel& model,
     using HalOperandType     = typename HalPolicy::OperandType;
     using HalOperandLifeTime = typename HalPolicy::OperandLifeTime;
 
-    DataLocation location = {};
+    V1_0::DataLocation location = {};
     location.offset = model.operandValues.size();
     location.length = sizeof(float);
 
diff --git a/test/FullyConnected.cpp b/test/FullyConnected.cpp
index 2f9cd4b..4717357 100644
--- a/test/FullyConnected.cpp
+++ b/test/FullyConnected.cpp
@@ -1,17 +1,14 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#include "DriverTestHelpers.hpp"
-
-#include "../1.0/HalPolicy.hpp"
 
-#include <boost/test/unit_test.hpp>
+#include "DriverTestHelpers.hpp"
 
 #include <log/log.h>
 
-BOOST_AUTO_TEST_SUITE(FullyConnectedTests)
-
+DOCTEST_TEST_SUITE("FullyConnectedTests")
+{
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
@@ -19,7 +16,7 @@ using namespace armnn_driver;
 using HalPolicy = hal_1_0::HalPolicy;
 
 // Add our own test here since we fail the fc tests which Google supplies (because of non-const weights)
-BOOST_AUTO_TEST_CASE(FullyConnected)
+DOCTEST_TEST_CASE("FullyConnected")
 {
     // this should ideally replicate fully_connected_float.model.cpp
     // but that uses slightly weird dimensions which I don't think we need to support for now
@@ -48,7 +45,7 @@ BOOST_AUTO_TEST_CASE(FullyConnected)
     android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
 
     // construct the request
-    DataLocation inloc = {};
+    V1_0::DataLocation inloc = {};
     inloc.poolIndex = 0;
     inloc.offset    = 0;
     inloc.length    = 3 * sizeof(float);
@@ -56,7 +53,7 @@ BOOST_AUTO_TEST_CASE(FullyConnected)
     input.location = inloc;
     input.dimensions = hidl_vec<uint32_t>{};
 
-    DataLocation outloc = {};
+    V1_0::DataLocation outloc = {};
     outloc.poolIndex = 1;
     outloc.offset    = 0;
     outloc.length    = 1 * sizeof(float);
@@ -83,10 +80,10 @@ BOOST_AUTO_TEST_CASE(FullyConnected)
     }
 
     // check the result
-    BOOST_TEST(outdata[0] == 152);
+    DOCTEST_CHECK(outdata[0] == 152);
 }
 
-BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
+DOCTEST_TEST_CASE("TestFullyConnected4dInput")
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
@@ -130,21 +127,21 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
     android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
 
     // construct the request
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 8 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 8 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
+    V1_0::DataLocation inloc = {};
+    inloc.poolIndex          = 0;
+    inloc.offset             = 0;
+    inloc.length             = 8 * sizeof(float);
+    RequestArgument input    = {};
+    input.location           = inloc;
+    input.dimensions         = hidl_vec<uint32_t>{};
+
+    V1_0::DataLocation outloc = {};
+    outloc.poolIndex          = 1;
+    outloc.offset             = 0;
+    outloc.length             = 8 * sizeof(float);
+    RequestArgument output    = {};
+    output.location           = outloc;
+    output.dimensions         = hidl_vec<uint32_t>{};
 
     V1_0::Request request = {};
     request.inputs  = hidl_vec<RequestArgument>{input};
@@ -165,17 +162,17 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInput)
     }
 
     // check the result
-    BOOST_TEST(outdata[0] == 1);
-    BOOST_TEST(outdata[1] == 2);
-    BOOST_TEST(outdata[2] == 3);
-    BOOST_TEST(outdata[3] == 4);
-    BOOST_TEST(outdata[4] == 5);
-    BOOST_TEST(outdata[5] == 6);
-    BOOST_TEST(outdata[6] == 7);
-    BOOST_TEST(outdata[7] == 8);
+    DOCTEST_CHECK(outdata[0] == 1);
+    DOCTEST_CHECK(outdata[1] == 2);
+    DOCTEST_CHECK(outdata[2] == 3);
+    DOCTEST_CHECK(outdata[3] == 4);
+    DOCTEST_CHECK(outdata[4] == 5);
+    DOCTEST_CHECK(outdata[5] == 6);
+    DOCTEST_CHECK(outdata[6] == 7);
+    DOCTEST_CHECK(outdata[7] == 8);
 }
 
-BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
+DOCTEST_TEST_CASE("TestFullyConnected4dInputReshape")
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
@@ -219,21 +216,21 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
     android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
 
     // construct the request
-    DataLocation inloc = {};
-    inloc.poolIndex = 0;
-    inloc.offset    = 0;
-    inloc.length    = 8 * sizeof(float);
-    RequestArgument input = {};
-    input.location = inloc;
-    input.dimensions = hidl_vec<uint32_t>{};
-
-    DataLocation outloc = {};
-    outloc.poolIndex = 1;
-    outloc.offset    = 0;
-    outloc.length    = 8 * sizeof(float);
-    RequestArgument output = {};
-    output.location  = outloc;
-    output.dimensions = hidl_vec<uint32_t>{};
+    V1_0::DataLocation inloc = {};
+    inloc.poolIndex          = 0;
+    inloc.offset             = 0;
+    inloc.length             = 8 * sizeof(float);
+    RequestArgument input    = {};
+    input.location           = inloc;
+    input.dimensions         = hidl_vec<uint32_t>{};
+
+    V1_0::DataLocation outloc = {};
+    outloc.poolIndex          = 1;
+    outloc.offset             = 0;
+    outloc.length             = 8 * sizeof(float);
+    RequestArgument output    = {};
+    output.location           = outloc;
+    output.dimensions         = hidl_vec<uint32_t>{};
 
     V1_0::Request request = {};
     request.inputs  = hidl_vec<RequestArgument>{input};
@@ -254,14 +251,126 @@ BOOST_AUTO_TEST_CASE(TestFullyConnected4dInputReshape)
     }
 
     // check the result
-    BOOST_TEST(outdata[0] == 1);
-    BOOST_TEST(outdata[1] == 2);
-    BOOST_TEST(outdata[2] == 3);
-    BOOST_TEST(outdata[3] == 4);
-    BOOST_TEST(outdata[4] == 5);
-    BOOST_TEST(outdata[5] == 6);
-    BOOST_TEST(outdata[6] == 7);
-    BOOST_TEST(outdata[7] == 8);
+    DOCTEST_CHECK(outdata[0] == 1);
+    DOCTEST_CHECK(outdata[1] == 2);
+    DOCTEST_CHECK(outdata[2] == 3);
+    DOCTEST_CHECK(outdata[3] == 4);
+    DOCTEST_CHECK(outdata[4] == 5);
+    DOCTEST_CHECK(outdata[5] == 6);
+    DOCTEST_CHECK(outdata[6] == 7);
+    DOCTEST_CHECK(outdata[7] == 8);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+DOCTEST_TEST_CASE("TestFullyConnectedWeightsAsInput")
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
+
+    V1_0::ErrorStatus error;
+    std::vector<bool> sup;
+
+    ArmnnDriver::getSupportedOperations_cb cb = [&](V1_0::ErrorStatus status, const std::vector<bool>& supported)
+    {
+        error = status;
+        sup = supported;
+    };
+
+    HalPolicy::Model model = {};
+
+    // operands
+    int32_t actValue      = 0;
+    float   weightValue[] = {1, 0, 0, 0, 0, 0, 0, 0,
+                             0, 1, 0, 0, 0, 0, 0, 0,
+                             0, 0, 1, 0, 0, 0, 0, 0,
+                             0, 0, 0, 1, 0, 0, 0, 0,
+                             0, 0, 0, 0, 1, 0, 0, 0,
+                             0, 0, 0, 0, 0, 1, 0, 0,
+                             0, 0, 0, 0, 0, 0, 1, 0,
+                             0, 0, 0, 0, 0, 0, 0, 1}; //identity
+    float   biasValue[]   = {0, 0, 0, 0, 0, 0, 0, 0};
+
+    // fully connected operation
+    AddInputOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 1, 1, 8});
+    AddInputOperand<HalPolicy>(model, hidl_vec<uint32_t>{8, 8});
+    AddInputOperand<HalPolicy>(model, hidl_vec<uint32_t>{8});
+    AddIntOperand<HalPolicy>(model, actValue);
+    AddOutputOperand<HalPolicy>(model, hidl_vec<uint32_t>{1, 8});
+
+    model.operations.resize(1);
+
+    model.operations[0].type = HalPolicy::OperationType::FULLY_CONNECTED;
+    model.operations[0].inputs  = hidl_vec<uint32_t>{0,1,2,3};
+    model.operations[0].outputs = hidl_vec<uint32_t>{4};
+
+    // make the prepared model
+    android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, *driver);
+
+    // construct the request for input
+    V1_0::DataLocation inloc = {};
+    inloc.poolIndex          = 0;
+    inloc.offset             = 0;
+    inloc.length             = 8 * sizeof(float);
+    RequestArgument input    = {};
+    input.location           = inloc;
+    input.dimensions         = hidl_vec<uint32_t>{1, 1, 1, 8};
+
+    // construct the request for weights as input
+    V1_0::DataLocation wloc = {};
+    wloc.poolIndex          = 1;
+    wloc.offset             = 0;
+    wloc.length             = 64 * sizeof(float);
+    RequestArgument weights = {};
+    weights.location        = wloc;
+    weights.dimensions      = hidl_vec<uint32_t>{8, 8};
+
+    // construct the request for bias as input
+    V1_0::DataLocation bloc = {};
+    bloc.poolIndex          = 2;
+    bloc.offset             = 0;
+    bloc.length             = 8 * sizeof(float);
+    RequestArgument bias    = {};
+    bias.location           = bloc;
+    bias.dimensions         = hidl_vec<uint32_t>{8};
+
+    V1_0::DataLocation outloc = {};
+    outloc.poolIndex          = 3;
+    outloc.offset             = 0;
+    outloc.length             = 8 * sizeof(float);
+    RequestArgument output    = {};
+    output.location           = outloc;
+    output.dimensions         = hidl_vec<uint32_t>{1, 8};
+
+    V1_0::Request request = {};
+    request.inputs  = hidl_vec<RequestArgument>{input, weights, bias};
+    request.outputs = hidl_vec<RequestArgument>{output};
+
+    // set the input data
+    float indata[] = {1,2,3,4,5,6,7,8};
+    AddPoolAndSetData(8, request, indata);
+
+    // set the weights data
+    AddPoolAndSetData(64, request, weightValue);
+    // set the bias data
+    AddPoolAndSetData(8, request, biasValue);
+
+    // add memory for the output
+    android::sp<IMemory> outMemory = AddPoolAndGetData<float>(8, request);
+    float* outdata = static_cast<float*>(static_cast<void*>(outMemory->getPointer()));
+
+    // run the execution
+    if (preparedModel != nullptr)
+    {
+        Execute(preparedModel, request);
+    }
+
+    // check the result
+    DOCTEST_CHECK(outdata[0] == 1);
+    DOCTEST_CHECK(outdata[1] == 2);
+    DOCTEST_CHECK(outdata[2] == 3);
+    DOCTEST_CHECK(outdata[3] == 4);
+    DOCTEST_CHECK(outdata[4] == 5);
+    DOCTEST_CHECK(outdata[5] == 6);
+    DOCTEST_CHECK(outdata[6] == 7);
+    DOCTEST_CHECK(outdata[7] == 8);
+}
+
+}
diff --git a/test/GenericLayerTests.cpp b/test/GenericLayerTests.cpp
index 188c7b1..bd86a88 100644
--- a/test/GenericLayerTests.cpp
+++ b/test/GenericLayerTests.cpp
@@ -1,16 +1,14 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#include "DriverTestHelpers.hpp"
-
-#include "../1.0/HalPolicy.hpp"
 
-#include <boost/test/unit_test.hpp>
+#include "DriverTestHelpers.hpp"
 
 #include <log/log.h>
 
-BOOST_AUTO_TEST_SUITE(GenericLayerTests)
+DOCTEST_TEST_SUITE("GenericLayerTests")
+{
 
 using namespace android::hardware;
 using namespace driverTestHelpers;
@@ -18,7 +16,7 @@ using namespace armnn_driver;
 
 using HalPolicy = hal_1_0::HalPolicy;
 
-BOOST_AUTO_TEST_CASE(GetSupportedOperations)
+DOCTEST_TEST_CASE("GetSupportedOperations")
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
@@ -52,9 +50,9 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
     model0.operations[0].outputs = hidl_vec<uint32_t>{4};
 
     driver->getSupportedOperations(model0, cb);
-    BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
-    BOOST_TEST(supported.size() == (size_t)1);
-    BOOST_TEST(supported[0] == true);
+    DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+    DOCTEST_CHECK(supported.size() == (size_t)1);
+    DOCTEST_CHECK(supported[0] == true);
 
     V1_0::Model model1 = {};
 
@@ -81,8 +79,8 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
 
     driver->getSupportedOperations(model1, cb);
 
-    BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
-    BOOST_TEST(supported.empty());
+    DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
+    DOCTEST_CHECK(supported.empty());
 
     // Test Broadcast on add/mul operators
     HalPolicy::Model model2 = {};
@@ -114,10 +112,10 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
     model2.operations[1].outputs = hidl_vec<uint32_t>{4};
 
     driver->getSupportedOperations(model2, cb);
-    BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
-    BOOST_TEST(supported.size() == (size_t)2);
-    BOOST_TEST(supported[0] == true);
-    BOOST_TEST(supported[1] == true);
+    DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+    DOCTEST_CHECK(supported.size() == (size_t)2);
+    DOCTEST_CHECK(supported[0] == true);
+    DOCTEST_CHECK(supported[1] == true);
 
     V1_0::Model model3 = {};
 
@@ -143,9 +141,9 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
     model3.operations[0].outputs = hidl_vec<uint32_t>{3, 4};
 
     driver->getSupportedOperations(model3, cb);
-    BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
-    BOOST_TEST(supported.size() == (size_t)1);
-    BOOST_TEST(supported[0] == false);
+    DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+    DOCTEST_CHECK(supported.size() == (size_t)1);
+    DOCTEST_CHECK(supported[0] == false);
 
     HalPolicy::Model model4 = {};
 
@@ -158,14 +156,14 @@ BOOST_AUTO_TEST_CASE(GetSupportedOperations)
     model4.operations[0].outputs = hidl_vec<uint32_t>{0};
 
     driver->getSupportedOperations(model4, cb);
-    BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
-    BOOST_TEST(supported.empty());
+    DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::INVALID_ARGUMENT);
+    DOCTEST_CHECK(supported.empty());
 }
 
 // The purpose of this test is to ensure that when encountering an unsupported operation
 // it is skipped and getSupportedOperations() continues (rather than failing and stopping).
 // As per IVGCVSW-710.
-BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
+DOCTEST_TEST_CASE("UnsupportedLayerContinueOnFailure")
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
@@ -240,16 +238,16 @@ BOOST_AUTO_TEST_CASE(UnsupportedLayerContinueOnFailure)
 
     // We are testing that the unsupported layers return false and the test continues rather than failing and stopping
     driver->getSupportedOperations(model, cb);
-    BOOST_TEST((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
-    BOOST_TEST(supported.size() == (size_t)3);
-    BOOST_TEST(supported[0] == false);
-    BOOST_TEST(supported[1] == true);
-    BOOST_TEST(supported[2] == false);
+    DOCTEST_CHECK((int)errorStatus == (int)V1_0::ErrorStatus::NONE);
+    DOCTEST_CHECK(supported.size() == (size_t)3);
+    DOCTEST_CHECK(supported[0] == false);
+    DOCTEST_CHECK(supported[1] == true);
+    DOCTEST_CHECK(supported[2] == false);
 }
 
 // The purpose of this test is to ensure that when encountering an failure
 // during mem pool mapping we properly report an error to the framework via a callback
-BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
+DOCTEST_TEST_CASE("ModelToINetworkConverterMemPoolFail")
 {
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
@@ -268,8 +266,8 @@ BOOST_AUTO_TEST_CASE(ModelToINetworkConverterMemPoolFail)
 
     // Memory pool mapping should fail, we should report an error
     driver->getSupportedOperations(model, cb);
-    BOOST_TEST((int)errorStatus != (int)V1_0::ErrorStatus::NONE);
-    BOOST_TEST(supported.empty());
+    DOCTEST_CHECK((int)errorStatus != (int)V1_0::ErrorStatus::NONE);
+    DOCTEST_CHECK(supported.empty());
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/Lstm.hpp b/test/Lstm.hpp
index 21056c3..93f2f32 100644
--- a/test/Lstm.hpp
+++ b/test/Lstm.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -9,12 +9,15 @@
 
 #include <armnn/utility/IgnoreUnused.hpp>
 
-#include <boost/math/special_functions/relative_difference.hpp>
-
 #include <array>
 
 using ArmnnDriver   = armnn_driver::ArmnnDriver;
 using DriverOptions = armnn_driver::DriverOptions;
+using RequestArgument = V1_0::RequestArgument;
+
+#ifdef ARMNN_ANDROID_S
+#include <nnapi/Types.h>
+#endif
 
 using namespace driverTestHelpers;
 using namespace android::hardware;
@@ -25,7 +28,7 @@ namespace
 template<typename T>
 RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int poolIndex)
 {
-    DataLocation inputInloc = {};
+    V1_0::DataLocation inputInloc = {};
     inputInloc.poolIndex = poolIndex;
     inputInloc.offset = 0;
     inputInloc.length = value.size() * sizeof(T);
@@ -35,26 +38,6 @@ RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int
     return inputRequestArgument;
 }
 
-// Returns true if the relative difference between two float values is less than the tolerance value given.
-// This is used because the floating point comparison tolerance (set on each BOOST_AUTO_TEST_CASE) does not work!
-bool TolerantCompareEqual(float a, float b, float tolerance = 0.00001f)
-{
-    float rd;
-    if (a == 0.0f)
-    {
-        rd = fabs(b);
-    }
-    else if (b == 0.0f)
-    {
-        rd = fabs(a);
-    }
-    else
-    {
-        rd = boost::math::relative_difference(a, b);
-    }
-    return rd < tolerance;
-}
-
 // Helper function to create an OperandLifeTime::NO_VALUE for testing.
 // To be used on optional input operands that have no values - these are valid and should be tested.
 V1_0::OperandLifeTime CreateNoValueLifeTime(const hidl_vec<uint32_t>& dimensions)
@@ -95,12 +78,6 @@ void ExecuteModel<armnn_driver::hal_1_2::HalPolicy::Model>(const armnn_driver::h
 
 } // anonymous namespace
 
-#ifndef ARMCOMPUTECL_ENABLED
-static const std::array<armnn::Compute, 1> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef }};
-#else
-static const std::array<armnn::Compute, 2> COMPUTE_DEVICES = {{ armnn::Compute::CpuRef, armnn::Compute::GpuAcc }};
-#endif
-
 // Add our own tests here since we fail the lstm tests which Google supplies (because of non-const weights)
 template <typename HalPolicy>
 void LstmTestImpl(const hidl_vec<uint32_t>&   inputDimensions,
@@ -389,18 +366,20 @@ void LstmTestImpl(const hidl_vec<uint32_t>&   inputDimensions,
     // check the results
     for (size_t i = 0; i < outputStateOutValue.size(); ++i)
     {
-        BOOST_TEST(TolerantCompareEqual(outputStateOutValue[i], outputStateOutData[i]),
-                   "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != " << outputStateOutData[i]);
+        DOCTEST_CHECK_MESSAGE(outputStateOutValue[i] == doctest::Approx( outputStateOutData[i] ),
+                              "outputStateOut[" << i << "]: " << outputStateOutValue[i] << " != "
+                              << outputStateOutData[i]);
     }
     for (size_t i = 0; i < cellStateOutValue.size(); ++i)
     {
-        BOOST_TEST(TolerantCompareEqual(cellStateOutValue[i], cellStateOutData[i]),
-                   "cellStateOut[" << i << "]: " << cellStateOutValue[i] << " != " << cellStateOutData[i]);
+        DOCTEST_CHECK_MESSAGE(cellStateOutValue[i] == doctest::Approx( cellStateOutData[i] ),
+                              "cellStateOutValue[" << i << "]: " << cellStateOutValue[i] << " != "
+                              << cellStateOutData[i]);
     }
     for (size_t i = 0; i < outputValue.size(); ++i)
     {
-        BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i]),
-                   "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+        DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx( outputData[i] ),
+                              "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
     }
 }
 
@@ -664,13 +643,14 @@ void QuantizedLstmTestImpl(const hidl_vec<uint32_t>&    inputDimensions,
     // check the results
     for (size_t i = 0; i < cellStateOutValue.size(); ++i)
     {
-        BOOST_TEST(TolerantCompareEqual(cellStateOutValue[i], cellStateOutData[i], 1.0f),
-                   "cellStateOut[" << i << "]: " << cellStateOutValue[i] << " != " << cellStateOutData[i]);
+        DOCTEST_CHECK_MESSAGE(cellStateOutValue[i] == doctest::Approx( cellStateOutData[i] ),
+                              "cellStateOutValue[" << i << "]: " << cellStateOutValue[i] << " != "
+                              << cellStateOutData[i]);
     }
     for (size_t i = 0; i < outputValue.size(); ++i)
     {
-        BOOST_TEST(TolerantCompareEqual(outputValue[i], outputData[i], 1.0f),
-                   "output[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+        DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx( outputData[i] ),
+                              "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
     }
 }
 
diff --git a/test/SystemProperties.cpp b/test/SystemProperties.cpp
index e1a2632..ef95296 100644
--- a/test/SystemProperties.cpp
+++ b/test/SystemProperties.cpp
@@ -1,57 +1,58 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+
 #include "DriverTestHelpers.hpp"
-#include <boost/test/unit_test.hpp>
 #include <log/log.h>
-#include "../SystemPropertiesUtils.hpp"
+#include <SystemPropertiesUtils.hpp>
 
-BOOST_AUTO_TEST_SUITE(SystemProperiesTests)
+DOCTEST_TEST_SUITE("SystemProperiesTests")
+{
 
-BOOST_AUTO_TEST_CASE(SystemProperties)
+DOCTEST_TEST_CASE("SystemProperties")
 {
     // Test default value
     {
         auto p = __system_property_find("thisDoesNotExist");
-        BOOST_TEST((p == nullptr));
+        DOCTEST_CHECK((p == nullptr));
 
         int defaultValue = ParseSystemProperty("thisDoesNotExist", -4);
-        BOOST_TEST((defaultValue == -4));
+        DOCTEST_CHECK((defaultValue == -4));
     }
 
     //  Test default value from bad data type
     {
         __system_property_set("thisIsNotFloat", "notfloat");
         float defaultValue = ParseSystemProperty("thisIsNotFloat", 0.1f);
-        BOOST_TEST((defaultValue == 0.1f));
+        DOCTEST_CHECK((defaultValue == 0.1f));
     }
 
     // Test fetching bool values
     {
         __system_property_set("myTestBool", "1");
         bool b = ParseSystemProperty("myTestBool", false);
-        BOOST_TEST((b == true));
+        DOCTEST_CHECK((b == true));
     }
     {
         __system_property_set("myTestBool", "0");
         bool b = ParseSystemProperty("myTestBool", true);
-        BOOST_TEST((b == false));
+        DOCTEST_CHECK((b == false));
     }
 
     // Test fetching int
     {
         __system_property_set("myTestInt", "567");
         int i = ParseSystemProperty("myTestInt", 890);
-        BOOST_TEST((i==567));
+        DOCTEST_CHECK((i==567));
     }
 
     // Test fetching float
     {
         __system_property_set("myTestFloat", "1.2f");
         float f = ParseSystemProperty("myTestFloat", 3.4f);
-        BOOST_TEST((f==1.2f));
+        DOCTEST_CHECK((f==1.2f));
     }
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/TestHalfTensor.cpp b/test/TestHalfTensor.cpp
new file mode 100644
index 0000000..12cdc42
--- /dev/null
+++ b/test/TestHalfTensor.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "TestHalfTensor.hpp"
+
+namespace driverTestHelpers
+{
+
+hidl_vec<uint32_t> TestHalfTensor::GetDimensions() const
+{
+    hidl_vec<uint32_t> dimensions;
+    dimensions.resize(m_Shape.GetNumDimensions());
+    for (uint32_t i=0; i<m_Shape.GetNumDimensions(); ++i)
+    {
+        dimensions[i] = m_Shape[i];
+    }
+    return dimensions;
+}
+
+unsigned int TestHalfTensor::GetNumElements() const
+{
+    return m_Shape.GetNumElements();
+}
+
+const Half * TestHalfTensor::GetData() const
+{
+    DOCTEST_CHECK(m_Data.empty() == false);
+    return &m_Data[0];
+}
+
+} // namespace driverTestHelpers
diff --git a/test/TestHalfTensor.hpp b/test/TestHalfTensor.hpp
new file mode 100644
index 0000000..2b7870f
--- /dev/null
+++ b/test/TestHalfTensor.hpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <ArmnnDriver.hpp>
+#include "DriverTestHelpers.hpp"
+
+#include <half/half.hpp>
+
+using Half = half_float::half;
+
+namespace driverTestHelpers
+{
+
+class TestHalfTensor
+{
+public:
+    TestHalfTensor(const armnn::TensorShape & shape,
+               const std::vector<Half> & data)
+        : m_Shape{shape}
+        , m_Data{data}
+    {
+        DOCTEST_CHECK(m_Shape.GetNumElements() == m_Data.size());
+    }
+
+    hidl_vec<uint32_t> GetDimensions() const;
+    unsigned int GetNumElements() const;
+    const Half * GetData() const;
+
+private:
+    armnn::TensorShape   m_Shape;
+    std::vector<Half>   m_Data;
+};
+
+} // driverTestHelpers
diff --git a/test/TestTensor.cpp b/test/TestTensor.cpp
index e6cb446..39bcd5a 100644
--- a/test/TestTensor.cpp
+++ b/test/TestTensor.cpp
@@ -1,7 +1,8 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+
 #include "TestTensor.hpp"
 
 namespace driverTestHelpers
@@ -25,7 +26,7 @@ unsigned int TestTensor::GetNumElements() const
 
 const float * TestTensor::GetData() const
 {
-    ARMNN_ASSERT(m_Data.empty() == false);
+    DOCTEST_CHECK(m_Data.empty() == false);
     return &m_Data[0];
 }
 
diff --git a/test/TestTensor.hpp b/test/TestTensor.hpp
index 1cd1950..b0613eb 100644
--- a/test/TestTensor.hpp
+++ b/test/TestTensor.hpp
@@ -1,12 +1,12 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
-#pragma once
 
-#include "../ArmnnDriver.hpp"
+#pragma once
 
-#include <armnn/utility/Assert.hpp>
+#include <ArmnnDriver.hpp>
+#include "DriverTestHelpers.hpp"
 
 namespace driverTestHelpers
 {
@@ -19,7 +19,7 @@ public:
     : m_Shape{shape}
     , m_Data{data}
     {
-        ARMNN_ASSERT(m_Shape.GetNumElements() == m_Data.size());
+        DOCTEST_CHECK(m_Shape.GetNumElements() == m_Data.size());
     }
 
     hidl_vec<uint32_t> GetDimensions() const;
diff --git a/test/Tests.cpp b/test/Tests.cpp
index 3b629a7..4628414 100644
--- a/test/Tests.cpp
+++ b/test/Tests.cpp
@@ -1,31 +1,35 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
+
 #define LOG_TAG "ArmnnDriverTests"
-#define BOOST_TEST_MODULE armnn_driver_tests
-#include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
-#include "DriverTestHelpers.hpp"
+#ifndef DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
+#endif
 
-BOOST_AUTO_TEST_SUITE(DriverTests)
+#include "DriverTestHelpers.hpp"
 
 using namespace android::hardware;
 using namespace driverTestHelpers;
 using namespace armnn_driver;
 
-BOOST_AUTO_TEST_CASE(Init)
+DOCTEST_TEST_SUITE("DriverTests")
+{
+
+DOCTEST_TEST_CASE("Init")
 {
     // Making the driver object on the stack causes a weird libc error, so make it on the heap instead
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
 
-    DeviceStatus status = driver->getStatus();
-    // Note double-parentheses to avoid compile error from Boost trying to printf the DeviceStatus
-    BOOST_TEST((status == DeviceStatus::AVAILABLE));
+    V1_0::DeviceStatus status = driver->getStatus();
+    // Note double-parentheses to avoid compile error from doctest trying to printf the DeviceStatus
+    DOCTEST_CHECK((status == V1_0::DeviceStatus::AVAILABLE));
 }
 
-BOOST_AUTO_TEST_CASE(TestCapabilities)
+DOCTEST_TEST_CASE("TestCapabilities")
 {
     // Making the driver object on the stack causes a weird libc error, so make it on the heap instead
     auto driver = std::make_unique<ArmnnDriver>(DriverOptions(armnn::Compute::CpuRef));
@@ -41,11 +45,11 @@ BOOST_AUTO_TEST_CASE(TestCapabilities)
 
     driver->getCapabilities(cb);
 
-    BOOST_TEST((int)error == (int)V1_0::ErrorStatus::NONE);
-    BOOST_TEST(cap.float32Performance.execTime > 0.f);
-    BOOST_TEST(cap.float32Performance.powerUsage > 0.f);
-    BOOST_TEST(cap.quantized8Performance.execTime > 0.f);
-    BOOST_TEST(cap.quantized8Performance.powerUsage > 0.f);
+    DOCTEST_CHECK((int)error == (int)V1_0::ErrorStatus::NONE);
+    DOCTEST_CHECK(cap.float32Performance.execTime > 0.f);
+    DOCTEST_CHECK(cap.float32Performance.powerUsage > 0.f);
+    DOCTEST_CHECK(cap.quantized8Performance.execTime > 0.f);
+    DOCTEST_CHECK(cap.quantized8Performance.powerUsage > 0.f);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
diff --git a/test/UnidirectionalSequenceLstm.hpp b/test/UnidirectionalSequenceLstm.hpp
new file mode 100644
index 0000000..75b7a8d
--- /dev/null
+++ b/test/UnidirectionalSequenceLstm.hpp
@@ -0,0 +1,1419 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "DriverTestHelpers.hpp"
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <array>
+
+using ArmnnDriver   = armnn_driver::ArmnnDriver;
+using DriverOptions = armnn_driver::DriverOptions;
+using RequestArgument = V1_0::RequestArgument;
+
+#ifdef ARMNN_ANDROID_S
+#include <nnapi/Types.h>
+#endif
+
+using namespace driverTestHelpers;
+using namespace android::hardware;
+
+namespace
+{
+
+template<typename T>
+RequestArgument CreateRequestArgument(const std::vector<T>& value, unsigned int poolIndex)
+{
+    V1_0::DataLocation inputInloc = {};
+    inputInloc.poolIndex = poolIndex;
+    inputInloc.offset = 0;
+    inputInloc.length = value.size() * sizeof(T);
+    RequestArgument inputRequestArgument = {};
+    inputRequestArgument.location = inputInloc;
+    inputRequestArgument.dimensions = hidl_vec<uint32_t>{};
+    return inputRequestArgument;
+}
+
+// Helper function to create an OperandLifeTime::NO_VALUE for testing.
+// To be used on optional input operands that have no values - these are valid and should be tested.
+V1_0::OperandLifeTime CreateNoValueLifeTime(const hidl_vec<uint32_t>& dimensions)
+{
+    // Only create a NO_VALUE for optional operands that have no elements
+    if (dimensions.size() == 0 || dimensions[0] == 0)
+    {
+        return V1_0::OperandLifeTime::NO_VALUE;
+    }
+    return V1_0::OperandLifeTime::CONSTANT_COPY;
+}
+
+template<typename HalModel>
+void ExecuteModel(const HalModel& model, armnn_driver::ArmnnDriver& driver, const V1_0::Request& request)
+{
+    android::sp<V1_0::IPreparedModel> preparedModel = PrepareModel(model, driver);
+    if (preparedModel.get() != nullptr)
+    {
+        Execute(preparedModel, request);
+    }
+}
+
+#if defined(ARMNN_ANDROID_NN_V1_2) || defined(ARMNN_ANDROID_NN_V1_3)
+
+template<>
+void ExecuteModel<armnn_driver::hal_1_2::HalPolicy::Model>(const armnn_driver::hal_1_2::HalPolicy::Model& model,
+                                                           armnn_driver::ArmnnDriver& driver,
+                                                           const V1_0::Request& request)
+{
+    android::sp<V1_2::IPreparedModel> preparedModel = PrepareModel_1_2(model, driver);
+    if (preparedModel.get() != nullptr)
+    {
+        Execute(preparedModel, request);
+    }
+}
+
+#endif
+
+} // anonymous namespace
+
+// Add our own tests here since we fail the unidirectional sequence lstm
+// tests which Google supplies (because of non-const weights)
+template <typename HalPolicy>
+void UnidirectionalSequenceLstmTestImpl(const hidl_vec<uint32_t>& inputDimensions,
+                                        const std::vector<float>& inputValue,
+                                        const hidl_vec<uint32_t>& inputToInputWeightsDimensions,
+                                        const std::vector<float>& inputToInputWeightsValue,
+                                        const hidl_vec<uint32_t>& inputToForgetWeightsDimensions,
+                                        const std::vector<float>& inputToForgetWeightsValue,
+                                        const hidl_vec<uint32_t>& inputToCellWeightsDimensions,
+                                        const std::vector<float>& inputToCellWeightsValue,
+                                        const hidl_vec<uint32_t>& inputToOutputWeightsDimensions,
+                                        const std::vector<float>& inputToOutputWeightsValue,
+                                        const hidl_vec<uint32_t>& recurrentToInputWeightsDimensions,
+                                        const std::vector<float>& recurrentToInputWeightsValue,
+                                        const hidl_vec<uint32_t>& recurrentToForgetWeightsDimensions,
+                                        const std::vector<float>& recurrentToForgetWeightsValue,
+                                        const hidl_vec<uint32_t>& recurrentToCellWeightsDimensions,
+                                        const std::vector<float>& recurrentToCellWeightsValue,
+                                        const hidl_vec<uint32_t>& recurrentToOutputWeightsDimensions,
+                                        const std::vector<float>& recurrentToOutputWeightsValue,
+                                        const hidl_vec<uint32_t>& cellToInputWeightsDimensions,
+                                        const std::vector<float>& cellToInputWeightsValue,
+                                        const hidl_vec<uint32_t>& cellToForgetWeightsDimensions,
+                                        const std::vector<float>& cellToForgetWeightsValue,
+                                        const hidl_vec<uint32_t>& cellToOutputWeightsDimensions,
+                                        const std::vector<float>& cellToOutputWeightsValue,
+                                        const hidl_vec<uint32_t>& inputGateBiasDimensions,
+                                        const std::vector<float>& inputGateBiasValue,
+                                        const hidl_vec<uint32_t>& forgetGateBiasDimensions,
+                                        const std::vector<float>& forgetGateBiasValue,
+                                        const hidl_vec<uint32_t>& cellBiasDimensions,
+                                        const std::vector<float>& cellBiasValue,
+                                        const hidl_vec<uint32_t>& outputGateBiasDimensions,
+                                        const std::vector<float>& outputGateBiasValue,
+                                        const hidl_vec<uint32_t>& projectionWeightsDimensions,
+                                        const std::vector<float>& projectionWeightsValue,
+                                        const hidl_vec<uint32_t>& projectionBiasDimensions,
+                                        const std::vector<float>& projectionBiasValue,
+                                        const hidl_vec<uint32_t>& outputStateInDimensions,
+                                        const std::vector<float>& outputStateInValue,
+                                        const hidl_vec<uint32_t>& cellStateInDimensions,
+                                        const std::vector<float>& cellStateInValue,
+                                        const hidl_vec<uint32_t>& activationFunctionDimensions,
+                                        const std::vector<int32_t>& activationFunctionValue,
+                                        const hidl_vec<uint32_t>& cellClippingThresholdDimensions,
+                                        const std::vector<float>& cellClippingThresholdValue,
+                                        const hidl_vec<uint32_t>& projectionClippingThresholdDimensions,
+                                        const std::vector<float>& projectionClippingThresholdValue,
+                                        const bool& timeMajorValue,
+                                        const hidl_vec<uint32_t>& inputLayerNormWeightsDimensions,
+                                        const std::vector<float>& inputLayerNormWeightsValue,
+                                        const hidl_vec<uint32_t>& forgetLayerNormWeightsDimensions,
+                                        const std::vector<float>& forgetLayerNormWeightsValue,
+                                        const hidl_vec<uint32_t>& cellLayerNormWeightsDimensions,
+                                        const std::vector<float>& cellLayerNormWeightsValue,
+                                        const hidl_vec<uint32_t>& outputLayerNormWeightsDimensions,
+                                        const std::vector<float>& outputLayerNormWeightsValue,
+                                        const hidl_vec<uint32_t>& outputDimensions,
+                                        const std::vector<float>& outputValue,
+                                        const hidl_vec<uint32_t>&, // outputStateOutDimensions,
+                                        const std::vector<float>&, // outputStateOutValue,
+                                        const hidl_vec<uint32_t>&, // cellStateOutDimensions,
+                                        const std::vector<float>&, // cellStateOutValue,
+                                        armnn::Compute compute,
+                                        float epsilonValue = 0)
+{
+    auto driver = std::make_unique<ArmnnDriver>(DriverOptions(compute));
+    using Model = typename HalPolicy::Model;
+    Model model = {};
+
+    // Inputs:
+    // 00: The input: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, input_size], where
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    AddInputOperand<HalPolicy>(model, inputDimensions);
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    AddTensorOperand<HalPolicy>(model,
+                                inputToInputWeightsDimensions,
+                                inputToInputWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(inputToInputWeightsDimensions));
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    AddTensorOperand<HalPolicy>(model, inputToForgetWeightsDimensions, inputToForgetWeightsValue);
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    // [num_units, input_size].
+    AddTensorOperand<HalPolicy>(model, inputToCellWeightsDimensions, inputToCellWeightsValue);
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    AddTensorOperand<HalPolicy>(model, inputToOutputWeightsDimensions, inputToOutputWeightsValue);
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    AddTensorOperand<HalPolicy>(model,
+                                recurrentToInputWeightsDimensions,
+                                recurrentToInputWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(recurrentToInputWeightsDimensions));
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    AddTensorOperand<HalPolicy>(model, recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue);
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    AddTensorOperand<HalPolicy>(model, recurrentToCellWeightsDimensions, recurrentToCellWeightsValue);
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    AddTensorOperand<HalPolicy>(model, recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue);
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand<HalPolicy>(model,
+                                cellToInputWeightsDimensions,
+                                cellToInputWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(cellToInputWeightsDimensions));
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand<HalPolicy>(model,
+                                cellToForgetWeightsDimensions,
+                                cellToForgetWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(cellToForgetWeightsDimensions));
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand<HalPolicy>(model,
+                                cellToOutputWeightsDimensions,
+                                cellToOutputWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(cellToOutputWeightsDimensions));
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand<HalPolicy>(model,
+                                inputGateBiasDimensions,
+                                inputGateBiasValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(inputGateBiasDimensions));
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand<HalPolicy>(model, forgetGateBiasDimensions, forgetGateBiasValue);
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand<HalPolicy>(model, cellBiasDimensions, cellBiasValue);
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    AddTensorOperand<HalPolicy>(model, outputGateBiasDimensions, outputGateBiasValue);
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    AddTensorOperand<HalPolicy>(model,
+                                projectionWeightsDimensions,
+                                projectionWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(projectionWeightsDimensions));
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    AddTensorOperand<HalPolicy>(model,
+                                projectionBiasDimensions,
+                                projectionBiasValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(projectionBiasDimensions));
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    AddInputOperand<HalPolicy>(model, outputStateInDimensions);
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    AddInputOperand<HalPolicy>(model, cellStateInDimensions);
+
+    // Constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    AddTensorOperand<HalPolicy>(model,
+                                activationFunctionDimensions,
+                                activationFunctionValue,
+                                HalPolicy::OperandType::INT32);
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    AddTensorOperand<HalPolicy>(model,
+                                cellClippingThresholdDimensions,
+                                cellClippingThresholdValue,
+                                HalPolicy::OperandType::FLOAT32);
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    AddTensorOperand<HalPolicy>(model,
+                                projectionClippingThresholdDimensions,
+                                projectionClippingThresholdValue,
+                                HalPolicy::OperandType::FLOAT32);
+
+    // 23: Time-major if true, batch-major if false.
+    AddBoolOperand<HalPolicy>(model, timeMajorValue);
+
+    // Normalization:
+    // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at input gate.
+    AddTensorOperand<HalPolicy>(model,
+                                inputLayerNormWeightsDimensions,
+                                inputLayerNormWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(inputLayerNormWeightsDimensions));
+    // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at forget gate.
+    AddTensorOperand<HalPolicy>(model,
+                                forgetLayerNormWeightsDimensions,
+                                forgetLayerNormWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(forgetLayerNormWeightsDimensions));
+    // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at cell gate.
+    AddTensorOperand<HalPolicy>(model,
+                                cellLayerNormWeightsDimensions,
+                                cellLayerNormWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(cellLayerNormWeightsDimensions));
+    // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at output gate.
+    AddTensorOperand<HalPolicy>(model,
+                                outputLayerNormWeightsDimensions,
+                                outputLayerNormWeightsValue,
+                                HalPolicy::OperandType::TENSOR_FLOAT32,
+                                CreateNoValueLifeTime(outputLayerNormWeightsDimensions));
+
+    // Outputs:
+    // 00: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape:  if time-major:
+    // [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+    AddOutputOperand<HalPolicy>(model, outputDimensions);
+    // 01: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    // [batch_size, output_size]. This output is optional and can be omitted. If this output
+    // is present then output #2 must be present as well.
+    //AddOutputOperand<HalPolicy>(model, hiddenStateOutDimensions);
+    // 02: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    // [batch_size, num_units]. This output is optional and can be omitted.
+    //AddOutputOperand<HalPolicy>(model, cellStateOutDimensions);
+
+    // make the lstm operation
+    model.operations.resize(1);
+    model.operations[0].type = HalPolicy::OperationType::UNIDIRECTIONAL_SEQUENCE_LSTM;
+
+    model.operations[0].inputs = hidl_vec<uint32_t> {0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
+                                                     14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27};
+    model.operations[0].outputs = hidl_vec<uint32_t> {28};
+
+    // define the input values
+    hidl_vec<RequestArgument> inputArguments;
+    inputArguments.resize(3);
+
+    inputArguments[0] = CreateRequestArgument<float>(inputValue, 0);
+    inputArguments[1] = CreateRequestArgument<float>(outputStateInValue, 1);
+    inputArguments[2] = CreateRequestArgument<float>(cellStateInValue, 2);
+
+    // define the expected output values
+    hidl_vec<RequestArgument> outputArguments;
+    outputArguments.resize(1);
+
+    outputArguments[0] = CreateRequestArgument<float>(outputValue, 3);
+
+    V1_0::Request request = {};
+    request.inputs  = inputArguments;
+    request.outputs = outputArguments;
+
+    // set the input data
+    AddPoolAndSetData(inputValue.size(), request, inputValue.data());
+    AddPoolAndSetData(outputStateInValue.size(), request, outputStateInValue.data());
+    AddPoolAndSetData(cellStateInValue.size(), request, cellStateInValue.data());
+
+    // add memory for the outputs
+    android::sp<IMemory> outputMemory = AddPoolAndGetData<float>(outputValue.size(), request);
+    float* outputData = static_cast<float*>(static_cast<void*>(outputMemory->getPointer()));
+
+    // make the prepared model and run the execution
+    ExecuteModel(model, *driver, request);
+
+    // check the results
+    if (epsilonValue != 0)
+    {
+        for (size_t i = 0; i < outputValue.size(); ++i)
+        {
+            DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx(outputData[i]).epsilon(epsilonValue),
+                                  "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+        }
+    }
+    else
+    {
+        for (size_t i = 0; i < outputValue.size(); ++i)
+        {
+            DOCTEST_CHECK_MESSAGE(outputValue[i] == doctest::Approx(outputData[i]),
+                                  "outputValue[" << i << "]: " << outputValue[i] << " != " << outputData[i]);
+        }
+    }
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerFloat32TestImpl(armnn::Compute compute)
+{
+    uint32_t batchSize  = 3;
+    uint32_t timeSize   = 2;
+    uint32_t inputSize  = 3;
+    uint32_t outputSize = 4;
+    uint32_t numUnits   = outputSize;
+
+    // Inputs:
+    // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+    //     [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+    std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+                                  3., 2., 1., 2., 3., 4.,
+                                  5., 4., 3., 2., 1., 2.};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToInputWeightsValue{-0.49536117f, -0.0556083915f, -0.102400711f,
+                                                -0.117484632f, 0.3298470976f, -0.1179017122f,
+                                                0.214305695f, 0.42135173085f, 0.003878414626f,
+                                                -0.348303917f, -0.1881275477f, 0.0343011027f};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToForgetWeightsValue{0.2415594226f, 0.15400093799f, 0.4566498398f,
+                                                 -0.3810434485f, 0.268383264f, -0.009807467424f,
+                                                 -0.3522925403f, -0.24275735512f, -0.28344226125f,
+                                                 0.13512269116f, -0.4932442977f, -0.10039821991f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToCellWeightsValue{-0.2504855627f, 0.184490025045f, -0.2480507493f,
+                                               0.386399507f, -0.259465157985f, -0.16545993089f,
+                                               -0.4230232555f, 0.341664791103f, -0.18127849691f,
+                                               -0.2277662414f, -0.55275535589f, 0.34184026718f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToOutputWeightsValue{0.2303854227f, 0.5218806862f, -0.4865379333f,
+                                                 0.53969591851f, 0.23393625035f, -0.27140527306f,
+                                                 0.50009280443f, 0.07511717046f, 0.3998299249f,
+                                                 -0.51717478049f, 0.1889653282f, -0.367323637f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToInputWeightsValue{-0.128009796112f, 0.1995525098f, -0.07745539397f, 0.1558421701f,
+                                                    -0.265254765766f, -0.38837709614f, -0.05636804124f, 0.4259087456f,
+                                                    0.17628988623f, 0.3877420127f, 0.53300309181f, -0.0959980934f,
+                                                    0.00302857416f, 0.3266998827f, -0.142509296562f, -0.04433270756f};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToForgetWeightsValue{-0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+                                                     -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+                                                     -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+                                                     -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToCellWeightsValue{-0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+                                                   -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+                                                   0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+                                                   0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToOutputWeightsValue{-0.32921677827f, 0.32624614238f, -0.1388191282f,
+                                                     -0.17879831790f, -0.15185534954f, -0.16918526583f,
+                                                     -0.10087361183f, -0.5436913968f, 0.016758225858f,
+                                                     0.30454617738f, -0.41493862867f, -0.005565764375f,
+                                                     -0.12584099173f, -0.12319286912f, 0.2407919466f,
+                                                     -0.08879069983f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions{0};
+    std::vector<float> cellToInputWeightsValue;
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions{0};
+    std::vector<float> cellToForgetWeightsValue;
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions{0};
+    std::vector<float> cellToOutputWeightsValue;
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+    std::vector<float> inputGateBiasValue(numUnits, 0.0f);
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+    std::vector<float> forgetGateBiasValue(numUnits, 1.0f);
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+    std::vector<float> cellBiasValue(numUnits, 0.0f);
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+    std::vector<float> outputGateBiasValue(numUnits, 0.0f);
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions{0};
+    std::vector<float> projectionWeightsValue;
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions{0};
+    std::vector<float> projectionBiasValue;
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<float> outputStateInValue(batchSize * outputSize, 0.0f);
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+    std::vector<float> cellStateInValue(batchSize * numUnits, 0.0f);
+
+    // Constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t>   activationFunctionDimensions{};
+    std::vector<int32_t> activationFunctionValue{4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   cellClippingThresholdDimensions{};
+    std::vector<float>   cellClippingThresholdValue{10.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   projectionClippingThresholdDimensions{};
+    std::vector<float>   projectionClippingThresholdValue{0.f};
+
+    // 23: Time-major if true, batch-major if false.
+    bool timeMajorValue = false;
+
+    // Normalization:
+    // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at input gate.
+    hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+    std::vector<float> inputLayerNormWeightsValue;
+    // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at forget gate.
+    hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+    std::vector<float> forgetLayerNormWeightsValue;
+    // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at cell gate.
+    hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+    std::vector<float> cellLayerNormWeightsValue;
+    // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at output gate.
+    hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+    std::vector<float> outputLayerNormWeightsValue;
+
+    // Outputs:
+    // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape:  if time-major:
+    //    [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+    hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+    std::vector<float> outputValue{-0.07149004f, -0.1621171f, -0.17516759f, -0.0232934225f,
+                                   -0.16810727f, -0.41412935f, -0.5498753f, -0.00803578f,
+                                   -0.06687349f, 0.204077631f, -0.4276504f, -0.03123213f,
+                                   -0.12000261f, -0.0941918f, -0.45639035f, -0.02870186f,
+                                   -0.03429216f, 0.20824050f, -0.6569892f, -0.004152651f,
+                                   -0.10493034f, 0.14210969f, -0.58347696f, -0.03297536f};
+
+    // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, output_size]. This output is optional and can be omitted. If this output
+    //    is present then output #2 must be present as well.
+    hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+    std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+    // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, num_units]. This output is optional and can be omitted.
+    hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+    std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+    UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+                                                  inputToInputWeightsDimensions, inputToInputWeightsValue,
+                                                  inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+                                                  inputToCellWeightsDimensions, inputToCellWeightsValue,
+                                                  inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+                                                  recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+                                                  recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+                                                  recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+                                                  recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+                                                  cellToInputWeightsDimensions, cellToInputWeightsValue,
+                                                  cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+                                                  cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+                                                  inputGateBiasDimensions, inputGateBiasValue,
+                                                  forgetGateBiasDimensions, forgetGateBiasValue,
+                                                  cellBiasDimensions, cellBiasValue,
+                                                  outputGateBiasDimensions, outputGateBiasValue,
+                                                  projectionWeightsDimensions, projectionWeightsValue,
+                                                  projectionBiasDimensions, projectionBiasValue,
+                                                  outputStateInDimensions, outputStateInValue,
+                                                  cellStateInDimensions, cellStateInValue,
+                                                  activationFunctionDimensions, activationFunctionValue,
+                                                  cellClippingThresholdDimensions, cellClippingThresholdValue,
+                                                  projectionClippingThresholdDimensions,
+                                                  projectionClippingThresholdValue,
+                                                  timeMajorValue,
+                                                  inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+                                                  forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+                                                  cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+                                                  outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+                                                  outputDimensions, outputValue,
+                                                  hiddenStateOutDimensions, hiddenStateOutValue,
+                                                  cellStateOutDimensions, cellStateOutValue,
+                                                  compute);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerFloat32TimeMajorTestImpl(armnn::Compute compute)
+{
+    uint32_t batchSize  = 3;
+    uint32_t timeSize   = 2;
+    uint32_t inputSize  = 3;
+    uint32_t outputSize = 4;
+    uint32_t numUnits   = outputSize;
+
+    // Inputs:
+    // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+    //     [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions{timeSize, batchSize, inputSize};
+    std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+                                  3., 2., 1., 2., 3., 4.,
+                                  5., 4., 3., 2., 1., 2.};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToInputWeightsValue{0.27277296781539917f, 0.3813590407371521f, -0.394489049911499f,
+                                                0.2782636880874634f, -0.3793870210647583f, -0.018918335437774658f,
+                                                0.2724653482437134f, -0.19314253330230713f, -0.2947450876235962f,
+                                                -0.30253493785858154f, 0.4241350293159485f, -0.22560018301010132f};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToForgetWeightsValue{-0.2667974531650543f, -0.05505800247192383f, -0.20932340621948242f,
+                                                 -0.14345619082450867f, 0.09666192531585693f, -0.2604355812072754f,
+                                                 -0.2681812047958374f, -0.3314584493637085f, 0.4485899806022644f,
+                                                 -0.23467743396759033f, 0.5072842240333557f, -0.4192768931388855f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToCellWeightsValue{-0.15782442688941956f, -0.027530014514923096f, 0.4789854884147644f,
+                                               0.23227906227111816f, 0.28259342908859253f, -0.030095696449279785f,
+                                               0.10071521997451782f, -0.08535495400428772f, 0.18563997745513916f,
+                                               -0.3049069046974182f, -0.478048175573349f, 0.025234103202819824f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToOutputWeightsValue{-0.04584759473800659f, -0.2716066539287567f, 0.012970447540283203f,
+                                                 -0.4729190170764923f, -0.37422770261764526f, 0.49352723360061646f,
+                                                 0.3163864016532898f, -0.436781644821167f, -0.33074596524238586f,
+                                                 -0.32885751128196716f, -0.40959352254867554f, -0.2124689817428589f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToInputWeightsValue{0.23788475990f, -0.24948765337f, 0.50044941902f,
+                                                    0.14431896805f, -0.115940228137f, -0.717082679f,
+                                                    -0.17208620906f, 0.17850610617f, -0.16702319684f,
+                                                    -0.11384502053f, -0.309785276245f, -0.3316611672f,
+                                                    0.52380162477f, -0.06839632987f, -0.391478359627f,
+                                                    -0.10756178963f};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToForgetWeightsValue{0.11383482068f, 0.1676601767f, -0.08550968004f, 0.03399394089f,
+                                                     0.08042152225f, -0.2133381964f, 0.05182432704f, 0.38161808255f,
+                                                     -0.5018365979f, -0.08043262364f, 0.07894329014f, -0.07547105155f,
+                                                     0.12047368288f, 0.2986997961f, 0.0485043078f, -0.13372567296f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToCellWeightsValue{0.0433832928545f, 0.07587072294f, -0.120520234107f, 0.604576051f,
+                                                   -0.434353142986f, 0.009314475068f, 0.005085289478f, 0.08488202038f,
+                                                   -0.00025437487886f, 0.15245915082f, -0.1936587542f, 0.004754020f,
+                                                   -0.1582719236f, 0.3307867646f, 0.0236605107784f, 0.307716339826f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToOutputWeightsValue{-0.079031050201f, 0.041414566286f, -0.583727357285f,
+                                                     0.1025384515f, -0.172372072937f, 0.09214124082f,
+                                                     0.178184121827f, -0.2439443916f, 0.104485116899f,
+                                                     0.2600405514f, 0.064414866268f, 0.24141204357f,
+                                                     0.281875759363f, -0.14234502664f, 0.15126448862f,
+                                                     -0.24421440064f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions{0};
+    std::vector<float> cellToInputWeightsValue;
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions{0};
+    std::vector<float> cellToForgetWeightsValue;
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions{0};
+    std::vector<float> cellToOutputWeightsValue;
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+    std::vector<float> inputGateBiasValue(numUnits, 0.0f);
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+    std::vector<float> forgetGateBiasValue(numUnits, 1.0f);
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+    std::vector<float> cellBiasValue(numUnits, 0.0f);
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+    std::vector<float> outputGateBiasValue(numUnits, 0.0f);
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions{0};
+    std::vector<float> projectionWeightsValue;
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions{0};
+    std::vector<float> projectionBiasValue;
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<float> outputStateInValue(batchSize * outputSize, 0.0f);
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+    std::vector<float> cellStateInValue(batchSize * numUnits, 0.0f);
+
+    // Constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t>   activationFunctionDimensions{};
+    std::vector<int32_t> activationFunctionValue{4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   cellClippingThresholdDimensions{};
+    std::vector<float>   cellClippingThresholdValue{10.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   projectionClippingThresholdDimensions{};
+    std::vector<float>   projectionClippingThresholdValue{0.f};
+
+    // 23: Time-major if true, batch-major if false.
+    bool timeMajorValue = true;
+
+    // Normalization:
+    // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at input gate.
+    hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+    std::vector<float> inputLayerNormWeightsValue;
+    // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at forget gate.
+    hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+    std::vector<float> forgetLayerNormWeightsValue;
+    // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at cell gate.
+    hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+    std::vector<float> cellLayerNormWeightsValue;
+    // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at output gate.
+    hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+    std::vector<float> outputLayerNormWeightsValue;
+
+    // Outputs:
+    // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape:  if time-major:
+    //    [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+    hidl_vec<uint32_t> outputDimensions{timeSize, batchSize, outputSize};
+    std::vector<float> outputValue{0.135657698f, 0.124672532f, 0.0212090332f, -0.0530203655f,
+                                   0.106138252f, 0.0404792242f, 0.0151643595f, -0.00675163185f,
+                                   -0.0128514022f, 0.0644884035f, 0.0709072053f, -0.0454045124f,
+                                   0.16288602f,  0.16649379f,  0.02770456f, -0.03698075f,
+                                   0.11171641f,  0.043119f  ,  0.0762981f , -0.01228541f,
+                                   0.10439701f,  0.21439962f,  0.11919238f, -0.08390583f};
+
+    // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, output_size]. This output is optional and can be omitted. If this output
+    //    is present then output #2 must be present as well.
+    hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+    std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+    // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, num_units]. This output is optional and can be omitted.
+    hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+    std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+    UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+                                                  inputToInputWeightsDimensions, inputToInputWeightsValue,
+                                                  inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+                                                  inputToCellWeightsDimensions, inputToCellWeightsValue,
+                                                  inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+                                                  recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+                                                  recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+                                                  recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+                                                  recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+                                                  cellToInputWeightsDimensions, cellToInputWeightsValue,
+                                                  cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+                                                  cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+                                                  inputGateBiasDimensions, inputGateBiasValue,
+                                                  forgetGateBiasDimensions, forgetGateBiasValue,
+                                                  cellBiasDimensions, cellBiasValue,
+                                                  outputGateBiasDimensions, outputGateBiasValue,
+                                                  projectionWeightsDimensions, projectionWeightsValue,
+                                                  projectionBiasDimensions, projectionBiasValue,
+                                                  outputStateInDimensions, outputStateInValue,
+                                                  cellStateInDimensions, cellStateInValue,
+                                                  activationFunctionDimensions, activationFunctionValue,
+                                                  cellClippingThresholdDimensions, cellClippingThresholdValue,
+                                                  projectionClippingThresholdDimensions,
+                                                  projectionClippingThresholdValue,
+                                                  timeMajorValue,
+                                                  inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+                                                  forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+                                                  cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+                                                  outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+                                                  outputDimensions, outputValue,
+                                                  hiddenStateOutDimensions, hiddenStateOutValue,
+                                                  cellStateOutDimensions, cellStateOutValue,
+                                                  compute);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionTestImpl(armnn::Compute compute)
+{
+    uint32_t batchSize  = 2;
+    uint32_t timeSize   = 3;
+    uint32_t inputSize  = 4;
+    uint32_t outputSize = 5;
+    uint32_t numUnits   = 6;
+
+    // Inputs:
+    // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+    //     [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+    std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+                                  3., 2., 1., 2., 3., 4.,
+                                  5., 4., 3., 2., 1., 2.,
+                                  1., 2., 3., 4., 5., 4.};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToInputWeightsValue{0.021393683f, 0.06124551f, 0.046905167f, -0.014657677f,
+                                                -0.03149463f, 0.09171803f, 0.14647801f, 0.10797193f,
+                                                -0.0057968358f, 0.0019193048f, -0.2726754f, 0.10154029f,
+                                                -0.018539885f, 0.080349885f, -0.10262385f, -0.022599787f,
+                                                -0.09121155f, -0.008675967f, -0.045206103f, -0.0821282f,
+                                                -0.008045952f, 0.015478081f, 0.055217247f, 0.038719587f};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToForgetWeightsValue{-0.0018401089f, -0.004852237f, 0.03698424f, 0.014181704f,
+                                                 0.028273236f, -0.016726194f, -0.05249759f, -0.10204261f,
+                                                 0.00861066f, -0.040979505f, -0.009899187f, 0.01923892f,
+                                                 -0.028177269f, -0.08535103f, -0.14585495f, 0.10662567f,
+                                                 -0.01909731f, -0.017883534f, -0.0047269356f, -0.045103323f,
+                                                 0.0030784295f, 0.076784775f, 0.07463696f, 0.094531395f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToCellWeightsValue{-0.04580283f, -0.09549462f, -0.032418985f, -0.06454633f,
+                                               -0.043528453f, 0.043018587f, -0.049152344f, -0.12418144f,
+                                               -0.078985475f, -0.07596889f, 0.019484362f, -0.11434962f,
+                                               -0.0074034138f, -0.06314844f, -0.092981495f, 0.0062155537f,
+                                               -0.025034338f, -0.0028890965f, 0.048929527f, 0.06235075f,
+                                               0.10665918f, -0.032036792f, -0.08505916f, -0.10843358f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToOutputWeightsValue{-0.0998932f, -0.07201956f, -0.052803773f, -0.15629593f,
+                                                 -0.15001918f, -0.07650751f, 0.02359855f, -0.075155355f,
+                                                 -0.08037709f, -0.15093534f, 0.029517552f, -0.04751393f,
+                                                 0.010350531f, -0.02664851f, -0.016839722f, -0.023121163f,
+                                                 0.0077019283f, 0.012851257f, -0.05040649f, -0.0129761f,
+                                                 -0.021737747f, -0.038305793f, -0.06870586f, -0.01481247f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToInputWeightsValue{-0.001374326f, -0.078856036f, 0.10672688f, 0.029162422f,
+                                                    -0.11585556f, 0.02557986f, -0.13446963f, -0.035785314f,
+                                                    -0.01244275f, 0.025961924f, -0.02337298f, -0.044228926f,
+                                                    -0.055839065f, -0.046598054f, -0.010546039f, -0.06900766f,
+                                                    0.027239809f, 0.022582639f, -0.013296484f, -0.05459212f,
+                                                    0.08981f, -0.045407712f, 0.08682226f, -0.06867011f,
+                                                    -0.14390695f, -0.02916037f, 0.000996957f, 0.091420636f,
+                                                    0.14283475f, -0.07390571f};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToForgetWeightsValue{-0.057784554f, -0.026057621f, -0.068447545f, -0.022581743f,
+                                                     0.14811787f, 0.10826372f, 0.09471067f, 0.03987225f,
+                                                     -0.0039523416f, 0.00030638507f, 0.053185795f, 0.10572994f,
+                                                     0.08414449f, -0.022036452f, -0.00066928595f, -0.09203576f,
+                                                     0.032950465f, -0.10985798f, -0.023809856f, 0.0021431844f,
+                                                     -0.02196096f, -0.00326074f, 0.00058621005f, -0.074678116f,
+                                                     -0.06193199f, 0.055729095f, 0.03736828f, 0.020123724f,
+                                                     0.061878487f, -0.04729229f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToCellWeightsValue{-0.037322544f, 0.018592842f, 0.0056175636f, -0.06253426f,
+                                                   0.055647098f, -0.05713207f, -0.05626563f, 0.005559383f,
+                                                   0.03375411f, -0.025757805f, -0.088049285f, 0.06017052f,
+                                                   -0.06570978f, 0.007384076f, 0.035123326f, -0.07920549f,
+                                                   0.053676967f, 0.044480428f, -0.07663568f, 0.0071805613f,
+                                                   0.08089997f, 0.05143358f, 0.038261272f, 0.03339287f,
+                                                   -0.027673481f, 0.044746667f, 0.028349208f, 0.020090483f,
+                                                   -0.019443132f, -0.030755889f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToOutputWeightsValue{0.025825322f, -0.05813119f, 0.09495884f,
+                                                     -0.045984812f,-0.01255415f, -0.0026479573f,
+                                                     -0.08196161f, -0.054914974f, -0.0046604523f,
+                                                     -0.029587349f, -0.044576716f, -0.07480124f,
+                                                     -0.082868785f, 0.023254942f, 0.027502948f,
+                                                     -0.0039728214f, -0.08683098f, -0.08116779f,
+                                                     -0.014675607f, -0.037924774f, -0.023314456f,
+                                                     -0.007401714f, -0.09255757f, 0.029460307f,
+                                                     -0.08829125f, -0.005139627f, -0.08989442f,
+                                                     -0.0555066f, 0.13596267f, 0.025062224f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions{numUnits};
+    std::vector<float> cellToInputWeightsValue{0.040369894f, 0.030746894f, 0.24704495f,
+                                               0.018586371f, -0.037586458f, -0.15312155f};
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions{numUnits};
+    std::vector<float> cellToForgetWeightsValue{-0.01998659f, -0.15568835f, -0.24248174f,
+                                                -0.012770197f, 0.041331276f, -0.072311886f};
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions{numUnits};
+    std::vector<float> cellToOutputWeightsValue{0.08286371f, -0.08261836f, -0.51210177f,
+                                                0.002913762f, 0.17764764f, -0.5495371f};
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+    std::vector<float> inputGateBiasValue{0.02234832f, 0.14757581f, 0.18176508f,
+                                          0.10380666f, 0.053110216f, -0.06928846f};
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+    std::vector<float> forgetGateBiasValue{0.035185695f, -0.042891346f, -0.03032477f,
+                                           0.23027696f, 0.11098921f, 0.08989442f};
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+    std::vector<float> cellBiasValue{-0.024379363f, 0.0055531194f, 0.23377132f,
+                                     0.033463873f, -0.1483596f, 0.029460307f};
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+    std::vector<float> outputGateBiasValue{0.046159424f, -0.0012809046f, 0.03563469f,
+                                           0.12648113f, 0.027195795f, 0.35373217f};
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions{numUnits, outputSize};
+    std::vector<float> projectionWeightsValue{-0.009802181f, 0.09401916f, 0.0717386f, -0.13895074f, 0.09641832f,
+                                              0.060420845f, 0.08539281f, 0.054285463f, 0.061395317f, 0.034448683f,
+                                              -0.042991187f, 0.019801661f, -0.16840284f, -0.015726732f, -0.23041931f,
+                                              -0.024478018f, -0.10959692f, -0.013875541f, 0.18600968f, -0.061274476f,
+                                              0.0138165f, -0.08160894f, -0.07661644f, 0.032372914f, 0.16169067f,
+                                              0.22465782f, -0.03993472f, -0.004017731f, 0.08633481f, -0.28869787f};
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions{outputSize};
+    std::vector<float> projectionBiasValue(outputSize, 0.f);
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<float> outputStateInValue(batchSize * outputSize, 0.f);
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+    std::vector<float> cellStateInValue(batchSize * numUnits, 0.f);
+
+    // Constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t>   activationFunctionDimensions{};
+    std::vector<int32_t> activationFunctionValue{4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   cellClippingThresholdDimensions{};
+    std::vector<float>   cellClippingThresholdValue{10.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   projectionClippingThresholdDimensions{};
+    std::vector<float>   projectionClippingThresholdValue{0.f};
+
+    // 23: Time-major if true, batch-major if false.
+    bool timeMajorValue = false;
+
+    // Normalization:
+    // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at input gate.
+    hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+    std::vector<float> inputLayerNormWeightsValue;
+    // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at forget gate.
+    hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+    std::vector<float> forgetLayerNormWeightsValue;
+    // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at cell gate.
+    hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+    std::vector<float> cellLayerNormWeightsValue;
+    // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at output gate.
+    hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+    std::vector<float> outputLayerNormWeightsValue;
+
+    // Outputs:
+    // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape:  if time-major:
+    //    [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+    hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+    std::vector<float> outputValue{-0.0135612f, -0.0263441f, 0.0314008f, -0.00883455f, 0.00763052f,
+                                   -0.00126877f, -0.0292959f, 0.0449957f, -0.00976195f, -0.00492338f,
+                                   -0.0175702f, -0.0431753f, 0.0597117f, -0.0169154f, 0.0142087f,
+                                   0.00472515f, -0.0196355f, 0.0342524f, -0.00407936f, -0.0253189f,
+                                   -0.00512944f, -0.0293754f, 0.0512771f, -0.0151874f, -0.0246433f,
+                                   -0.00744986f, -0.0345103f, 0.0450666f, -0.00944991f, 0.0127171f};
+
+    // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, output_size]. This output is optional and can be omitted. If this output
+    //    is present then output #2 must be present as well.
+    hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+    std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+    // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, num_units]. This output is optional and can be omitted.
+    hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+    std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+    UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+                                                  inputToInputWeightsDimensions, inputToInputWeightsValue,
+                                                  inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+                                                  inputToCellWeightsDimensions, inputToCellWeightsValue,
+                                                  inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+                                                  recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+                                                  recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+                                                  recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+                                                  recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+                                                  cellToInputWeightsDimensions, cellToInputWeightsValue,
+                                                  cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+                                                  cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+                                                  inputGateBiasDimensions, inputGateBiasValue,
+                                                  forgetGateBiasDimensions, forgetGateBiasValue,
+                                                  cellBiasDimensions, cellBiasValue,
+                                                  outputGateBiasDimensions, outputGateBiasValue,
+                                                  projectionWeightsDimensions, projectionWeightsValue,
+                                                  projectionBiasDimensions, projectionBiasValue,
+                                                  outputStateInDimensions, outputStateInValue,
+                                                  cellStateInDimensions, cellStateInValue,
+                                                  activationFunctionDimensions, activationFunctionValue,
+                                                  cellClippingThresholdDimensions, cellClippingThresholdValue,
+                                                  projectionClippingThresholdDimensions,
+                                                  projectionClippingThresholdValue,
+                                                  timeMajorValue,
+                                                  inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+                                                  forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+                                                  cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+                                                  outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+                                                  outputDimensions, outputValue,
+                                                  hiddenStateOutDimensions, hiddenStateOutValue,
+                                                  cellStateOutDimensions, cellStateOutValue,
+                                                  compute, 0.0031454);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::Compute compute)
+{
+    uint32_t batchSize  = 3;
+    uint32_t timeSize   = 2;
+    uint32_t inputSize  = 3;
+    uint32_t outputSize = 4;
+    uint32_t numUnits   = 5;
+
+    // Inputs:
+    // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+    //     [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+    std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+                                  3., 2., 1., 2., 3., 4.,
+                                  5., 4., 3., 2., 1., 2.};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToInputWeightsValue{-0.49536117f, -0.0556083915f, -0.102400711f,
+                                                -0.117484632f, 0.3298470976f, -0.1179017122f,
+                                                0.214305695f, 0.42135173085f, 0.003878414626f,
+                                                -0.348303917f, -0.1881275477f, 0.0343011027f,
+                                                -0.38837709614f, -0.05636804124f, 0.4259087456f};
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToForgetWeightsValue{0.2415594226f, 0.15400093799f, 0.4566498398f,
+                                                 -0.3810434485f, 0.268383264f, -0.009807467424f,
+                                                 -0.3522925403f, -0.24275735512f, -0.28344226125f,
+                                                 0.13512269116f, -0.4932442977f, -0.10039821991f,
+                                                 0.2726137042f, 0.09216640889f, -0.06551410215f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToCellWeightsValue{-0.2504855627f, 0.184490025045f, -0.2480507493f,
+                                               0.386399507f, -0.259465157985f, -0.16545993089f,
+                                               -0.4230232555f, 0.341664791103f, -0.18127849691f,
+                                               -0.2277662414f, -0.55275535589f, 0.34184026718f,
+                                               0.3954237699f, -0.19407111404f, 0.30412107706f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToOutputWeightsValue{0.2303854227f, 0.5218806862f, -0.4865379333f,
+                                                 0.53969591851f, 0.23393625035f, -0.27140527306f,
+                                                 0.50009280443f, 0.07511717046f, 0.3998299249f,
+                                                 -0.51717478049f, 0.1889653282f, -0.367323637f,
+                                                 -0.12584099173f, -0.12319286912f, 0.2407919466f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToInputWeightsValue{-0.128009796112f, 0.1995525098f, -0.07745539397f, 0.1558421701f,
+                                                    -0.265254765766f, -0.38837709614f, -0.05636804124f, 0.4259087456f,
+                                                    0.17628988623f, 0.3877420127f, 0.53300309181f, -0.0959980934f,
+                                                    0.00302857416f, 0.3266998827f, -0.142509296562f, -0.04433270756f,
+                                                    0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f};
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToForgetWeightsValue{-0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+                                                     -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+                                                     -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+                                                     -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f,
+                                                     0.01841056f, -0.32764608f, -0.33027974f, -0.10826075f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToCellWeightsValue{-0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+                                                   -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+                                                   0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+                                                   0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f,
+                                                   0.19069612f, -0.03026325f, -0.54532051f, 0.33003211f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToOutputWeightsValue{-0.32921677827f, 0.32624614238f, -0.1388191282f,
+                                                     -0.17879831790f,-0.15185534954f, -0.16918526583f,
+                                                     -0.10087361183f, -0.5436913968f, 0.016758225858f,
+                                                     0.30454617738f, -0.41493862867f, -0.005565764375f,
+                                                     -0.12584099173f, -0.12319286912f, 0.2407919466f,
+                                                     -0.08879069983f, 0.11178309f, 0.09481031f,
+                                                     -0.26424935f, 0.46261835f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions{numUnits};
+    std::vector<float> cellToInputWeightsValue{0.05f, 0.1f, 0.25f, 0.15f, -0.02f};
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions{numUnits};
+    std::vector<float> cellToForgetWeightsValue{-0.02f, -0.15f, -0.25f, -0.03f, 0.15f};
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions{numUnits};
+    std::vector<float> cellToOutputWeightsValue{0.1f, -0.1f, -0.5f, 0.05f, 0.01f};
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions{numUnits};
+    std::vector<float> inputGateBiasValue{0.03f, 0.15f, 0.22f, 0.38f, 0.05f};
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+    std::vector<float> forgetGateBiasValue{0.1f, -0.3f, -0.2f, 0.1f, 0.4f};
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+    std::vector<float> cellBiasValue{-0.05f, 0.72f, 0.25f, 0.08f, 0.1f};
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+    std::vector<float> outputGateBiasValue{0.05f, -0.01f, 0.2f, 0.1f, -0.2f};
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions{numUnits, outputSize};
+    std::vector<float> projectionWeightsValue{-0.1f, 0.2f, 0.01f, -0.2f,
+                                              0.1f, 0.5f,  0.3f, 0.08f,
+                                              0.07f, 0.2f, -0.4f,  0.2f,
+                                              0.5f, -0.4f, 0.3f, -0.2f,
+                                              0.3f, 0.08f, -0.07f, 0.2f};
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions{outputSize};
+    std::vector<float> projectionBiasValue(outputSize, 0.f);
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<float> outputStateInValue(batchSize * outputSize, 0.f);
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+    std::vector<float> cellStateInValue(batchSize * numUnits, 0.f);
+
+    // Constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t>   activationFunctionDimensions{};
+    std::vector<int32_t> activationFunctionValue{4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   cellClippingThresholdDimensions{};
+    std::vector<float>   cellClippingThresholdValue{10.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   projectionClippingThresholdDimensions{};
+    std::vector<float>   projectionClippingThresholdValue{0.f};
+
+    // 23: Time-major if true, batch-major if false.
+    bool timeMajorValue = false;
+
+    // Normalization:
+    // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at input gate.
+    hidl_vec<uint32_t> inputLayerNormWeightsDimensions{numUnits};
+    std::vector<float> inputLayerNormWeightsValue{0.1f, 0.2f, 0.3f, 0.5f, 0.8f};
+    // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at forget gate.
+    hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{numUnits};
+    std::vector<float> forgetLayerNormWeightsValue{0.1f, 0.2f, 0.3f, 0.5f, 0.2f};
+    // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at cell gate.
+    hidl_vec<uint32_t> cellLayerNormWeightsDimensions{numUnits};
+    std::vector<float> cellLayerNormWeightsValue{0.7f, 0.2f, 0.3f, 0.8f, 0.5f};
+    // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at output gate.
+    hidl_vec<uint32_t> outputLayerNormWeightsDimensions{numUnits};
+    std::vector<float> outputLayerNormWeightsValue{0.6f, 0.2f, 0.2f, 0.5f, 0.1f};
+
+    // Outputs:
+    // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape:  if time-major:
+    //    [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+    hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+    std::vector<float> outputValue{0.0642256f, 0.0343966f, 0.184122f, 0.114717f,
+                                   0.11458f, 0.0407109f, 0.300327f, 0.174301f,
+                                   0.0864761f, 0.0362912f, 0.178635f, 0.115689f,
+                                   0.108008f, 0.0386623f, 0.273471f, 0.167115f,
+                                   0.0859545f, 0.0331481f, 0.186051f, 0.11888f,
+                                   0.106649f, 0.0276847f, 0.229863f, 0.166958f};
+
+    // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, output_size]. This output is optional and can be omitted. If this output
+    //    is present then output #2 must be present as well.
+    hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+    std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+    // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, num_units]. This output is optional and can be omitted.
+    hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+    std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+    UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+                                                  inputToInputWeightsDimensions, inputToInputWeightsValue,
+                                                  inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+                                                  inputToCellWeightsDimensions, inputToCellWeightsValue,
+                                                  inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+                                                  recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+                                                  recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+                                                  recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+                                                  recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+                                                  cellToInputWeightsDimensions, cellToInputWeightsValue,
+                                                  cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+                                                  cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+                                                  inputGateBiasDimensions, inputGateBiasValue,
+                                                  forgetGateBiasDimensions, forgetGateBiasValue,
+                                                  cellBiasDimensions, cellBiasValue,
+                                                  outputGateBiasDimensions, outputGateBiasValue,
+                                                  projectionWeightsDimensions, projectionWeightsValue,
+                                                  projectionBiasDimensions, projectionBiasValue,
+                                                  outputStateInDimensions, outputStateInValue,
+                                                  cellStateInDimensions, cellStateInValue,
+                                                  activationFunctionDimensions, activationFunctionValue,
+                                                  cellClippingThresholdDimensions, cellClippingThresholdValue,
+                                                  projectionClippingThresholdDimensions,
+                                                  projectionClippingThresholdValue,
+                                                  timeMajorValue,
+                                                  inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+                                                  forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+                                                  cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+                                                  outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+                                                  outputDimensions, outputValue,
+                                                  hiddenStateOutDimensions, hiddenStateOutValue,
+                                                  cellStateOutDimensions, cellStateOutValue,
+                                                  compute);
+}
+
+template<typename HalPolicy>
+void UnidirectionalSequenceLstmWithCifgWithPeepholeNoProjectionTestImpl(armnn::Compute compute)
+{
+    uint32_t batchSize  = 3;
+    uint32_t timeSize   = 2;
+    uint32_t inputSize  = 3;
+    uint32_t outputSize = 4;
+    uint32_t numUnits   = outputSize;
+
+    // Inputs:
+    // 00: The input: A 3-D tensor of shape: If time-major: [max_time, batch_size, input_size] If batch-major:
+    //     [batch_size, max_time, input_size] where “max_time” is the number of timesteps (sequence length),
+    //     “batch_size” corresponds to the batching dimension, and “input_size” is the size of the input.
+    hidl_vec<uint32_t> inputDimensions{batchSize, timeSize, inputSize};
+    std::vector<float> inputValue{1., 2., 3., 4., 5., 4.,
+                                  3., 2., 1., 2., 3., 4.,
+                                  5., 4., 3., 2., 1., 2.};
+
+    // 01: The input-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size], where “num_units” corresponds to the number of cell units.
+    hidl_vec<uint32_t> inputToInputWeightsDimensions{0};
+    std::vector<float> inputToInputWeightsValue;
+    // 02: The input-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToForgetWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToForgetWeightsValue{0.2415594226f, 0.15400093799f, 0.4566498398f,
+                                                 -0.3810434485f, 0.268383264f, -0.009807467424f,
+                                                 -0.3522925403f, -0.24275735512f, -0.28344226125f,
+                                                 0.13512269116f, -0.4932442977f, -0.10039821991f};
+    // 03: The input-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units, input_size].
+    hidl_vec<uint32_t> inputToCellWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToCellWeightsValue{-0.2504855627f, 0.184490025045f, -0.2480507493f,
+                                               0.386399507f, -0.259465157985f, -0.16545993089f,
+                                               -0.4230232555f, 0.341664791103f, -0.18127849691f,
+                                               -0.2277662414f, -0.55275535589f, 0.34184026718f};
+    // 04: The input-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, input_size].
+    hidl_vec<uint32_t> inputToOutputWeightsDimensions{numUnits, inputSize};
+    std::vector<float> inputToOutputWeightsValue{0.2303854227f, 0.5218806862f, -0.4865379333f,
+                                                 0.53969591851f, 0.23393625035f, -0.27140527306f,
+                                                 0.50009280443f, 0.07511717046f, 0.3998299249f,
+                                                 -0.51717478049f, 0.1889653282f, -0.367323637f};
+    // 05: The recurrent-to-input weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size], where “output_size” corresponds to either the number of cell units (i.e.,
+    //     “num_units”), or the second dimension of the “projection_weights”, if defined.
+    hidl_vec<uint32_t> recurrentToInputWeightsDimensions{0};
+    std::vector<float> recurrentToInputWeightsValue;
+    // 06: The recurrent-to-forget weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToForgetWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToForgetWeightsValue{-0.09499983487f, -0.08814888417f, -0.04834804721f, 0.1516668247f,
+                                                     -0.3967529535f, -0.06463699788f, 0.4952811002f, 0.003274492938f,
+                                                     -0.0968840941f, 0.17928104102f, 0.0031281141592f, -0.3387276584f,
+                                                     -0.3587934076f, 0.06705895066f, 0.22463923692f, 0.1961955726f};
+    // 07: The recurrent-to-cell weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToCellWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToCellWeightsValue{-0.21938985582f, -0.3023648226f, -0.1170005202f, -0.3509177422f,
+                                                   -0.4286288613f, 0.2726137042f, 0.09216640889f, -0.06551410215f,
+                                                   0.20453298098f, 0.2393476665f, 0.11846517771f, 0.2630801796f,
+                                                   0.3954237699f, -0.19407111404f, 0.30412107706f, -0.27342408554f};
+    // 08: The recurrent-to-output weights: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [num_units, output_size].
+    hidl_vec<uint32_t> recurrentToOutputWeightsDimensions{numUnits, outputSize};
+    std::vector<float> recurrentToOutputWeightsValue{-0.32921677827f, 0.32624614238f, -0.1388191282f,
+                                                     -0.17879831790f, -0.15185534954f, -0.16918526583f,
+                                                     -0.10087361183f, -0.5436913968f, 0.016758225858f,
+                                                     0.30454617738f, -0.41493862867f, -0.005565764375f,
+                                                     -0.12584099173f, -0.12319286912f, 0.2407919466f,
+                                                     -0.08879069983f};
+    // 09: The cell-to-input weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToInputWeightsDimensions{0};
+    std::vector<float> cellToInputWeightsValue;
+    // 10: The cell-to-forget weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToForgetWeightsDimensions{numUnits};
+    std::vector<float> cellToForgetWeightsValue{0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f};
+    // 11: The cell-to-output weights: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellToOutputWeightsDimensions{numUnits};
+    std::vector<float> cellToOutputWeightsValue{-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f};
+    // 12: The input gate bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> inputGateBiasDimensions{0};
+    std::vector<float> inputGateBiasValue;
+    // 13: The forget gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> forgetGateBiasDimensions{numUnits};
+    std::vector<float> forgetGateBiasValue{1., 1., 1., 1.};
+    // 14: The cell bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> cellBiasDimensions{numUnits};
+    std::vector<float> cellBiasValue{0., 0., 0., 0.};
+    // 15: The output gate bias: A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [num_units].
+    hidl_vec<uint32_t> outputGateBiasDimensions{numUnits};
+    std::vector<float> outputGateBiasValue{0., 0., 0., 0.};
+    // 16: The projection weights: Optional. A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape
+    //     [output_size, num_units].
+    hidl_vec<uint32_t> projectionWeightsDimensions{0};
+    std::vector<float> projectionWeightsValue;
+    // 17: The projection bias: Optional. A 1-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [output_size].
+    hidl_vec<uint32_t> projectionBiasDimensions{0};
+    std::vector<float> projectionBiasValue;
+
+    // 18: The output state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, output_size].
+    hidl_vec<uint32_t> outputStateInDimensions{batchSize, outputSize};
+    std::vector<float> outputStateInValue(batchSize * outputSize, 0.f);
+    // 19: The cell state: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32, of shape [batch_size, num_units].
+    hidl_vec<uint32_t> cellStateInDimensions{batchSize, numUnits};
+    std::vector<float> cellStateInValue(batchSize * numUnits, 0.f);
+
+    // Constant scalar values (the VTS test adds these as tensors of dim {})
+    // 20: The activation function: A value indicating the activation function:
+    //     0: None; 1: Relu; 3: Relu6; 4: Tanh; 6: Sigmoid.
+    hidl_vec<uint32_t>   activationFunctionDimensions{};
+    std::vector<int32_t> activationFunctionValue{4};
+    // 21: The clipping threshold: for the cell state, such that values are bound within [-cell_clip, cell_clip].
+    //     If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   cellClippingThresholdDimensions{};
+    std::vector<float>   cellClippingThresholdValue{10.0f};
+    // 22: The clipping threshold: for the output from the projection layer, such that values are bound within
+    //     [-proj_clip, proj_clip]. If set to 0.0 then clipping is disabled.
+    hidl_vec<uint32_t>   projectionClippingThresholdDimensions{};
+    std::vector<float>   projectionClippingThresholdValue{0.f};
+
+    // 23: Time-major if true, batch-major if false.
+    bool timeMajorValue = false;
+
+    // Normalization:
+    // 24:The input layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at input gate.
+    hidl_vec<uint32_t> inputLayerNormWeightsDimensions{0};
+    std::vector<float> inputLayerNormWeightsValue;
+    // 25:The forget layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at forget gate.
+    hidl_vec<uint32_t> forgetLayerNormWeightsDimensions{0};
+    std::vector<float> forgetLayerNormWeightsValue;
+    // 26:The cell layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at cell gate.
+    hidl_vec<uint32_t> cellLayerNormWeightsDimensions{0};
+    std::vector<float> cellLayerNormWeightsValue;
+    // 27:The output layer normalization weights. A 1-D tensor of shape [num_units].
+    //    Used to rescale normalized inputs to activation at output gate.
+    hidl_vec<uint32_t> outputLayerNormWeightsDimensions{0};
+    std::vector<float> outputLayerNormWeightsValue;
+
+    // Outputs:
+    // 0: The output: A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16. Shape:  if time-major:
+    //    [max_time, batch_size, output_size] If batch-major: [batch_size, max_time, output_size]
+    hidl_vec<uint32_t> outputDimensions{batchSize, timeSize, outputSize};
+    std::vector<float> outputValue{-0.0129257f, -0.070531f, -0.153508f, -0.0392391f,
+                                   -0.0300169f, -0.195717f, -0.528679f, -0.0818106f,
+                                   -0.0332748f, 0.155429f, -0.353966f, -0.0801505f,
+                                   -0.032312f, -0.0407911f, -0.435053f, -0.0932317f,
+                                   -0.0108233f, 0.165584f, -0.640424f, -0.0447535f,
+                                   -0.031675f, 0.125987f, -0.526695f, -0.110093f};
+
+    // 1: The hidden state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, output_size]. This output is optional and can be omitted. If this output
+    //    is present then output #2 must be present as well.
+    hidl_vec<uint32_t> hiddenStateOutDimensions{batchSize, outputSize};
+    std::vector<float> hiddenStateOutValue(batchSize * outputSize, 0.f);
+    // 2: The cell state (out): A 2-D tensor of ANEURALNETWORKS_TENSOR_FLOAT32/16, of shape
+    //    [batch_size, num_units]. This output is optional and can be omitted.
+    hidl_vec<uint32_t> cellStateOutDimensions{batchSize, numUnits};
+    std::vector<float> cellStateOutValue(batchSize * numUnits, 0.f);
+
+    UnidirectionalSequenceLstmTestImpl<HalPolicy>(inputDimensions, inputValue,
+                                                  inputToInputWeightsDimensions, inputToInputWeightsValue,
+                                                  inputToForgetWeightsDimensions, inputToForgetWeightsValue,
+                                                  inputToCellWeightsDimensions, inputToCellWeightsValue,
+                                                  inputToOutputWeightsDimensions, inputToOutputWeightsValue,
+                                                  recurrentToInputWeightsDimensions, recurrentToInputWeightsValue,
+                                                  recurrentToForgetWeightsDimensions, recurrentToForgetWeightsValue,
+                                                  recurrentToCellWeightsDimensions, recurrentToCellWeightsValue,
+                                                  recurrentToOutputWeightsDimensions, recurrentToOutputWeightsValue,
+                                                  cellToInputWeightsDimensions, cellToInputWeightsValue,
+                                                  cellToForgetWeightsDimensions, cellToForgetWeightsValue,
+                                                  cellToOutputWeightsDimensions, cellToOutputWeightsValue,
+                                                  inputGateBiasDimensions, inputGateBiasValue,
+                                                  forgetGateBiasDimensions, forgetGateBiasValue,
+                                                  cellBiasDimensions, cellBiasValue,
+                                                  outputGateBiasDimensions, outputGateBiasValue,
+                                                  projectionWeightsDimensions, projectionWeightsValue,
+                                                  projectionBiasDimensions, projectionBiasValue,
+                                                  outputStateInDimensions, outputStateInValue,
+                                                  cellStateInDimensions, cellStateInValue,
+                                                  activationFunctionDimensions, activationFunctionValue,
+                                                  cellClippingThresholdDimensions, cellClippingThresholdValue,
+                                                  projectionClippingThresholdDimensions,
+                                                  projectionClippingThresholdValue,
+                                                  timeMajorValue,
+                                                  inputLayerNormWeightsDimensions, inputLayerNormWeightsValue,
+                                                  forgetLayerNormWeightsDimensions, forgetLayerNormWeightsValue,
+                                                  cellLayerNormWeightsDimensions, cellLayerNormWeightsValue,
+                                                  outputLayerNormWeightsDimensions, outputLayerNormWeightsValue,
+                                                  outputDimensions, outputValue,
+                                                  hiddenStateOutDimensions, hiddenStateOutValue,
+                                                  cellStateOutDimensions, cellStateOutValue,
+                                                  compute);
+}
+\ No newline at end of file
diff --git a/test/UtilsTests.cpp b/test/UtilsTests.cpp
index 054c071..68d7b50 100644
--- a/test/UtilsTests.cpp
+++ b/test/UtilsTests.cpp
@@ -1,27 +1,65 @@
 //
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #include "DriverTestHelpers.hpp"
-#include <boost/test/unit_test.hpp>
 #include <log/log.h>
 
-#include "../Utils.hpp"
+#include <armnn/src/armnn/OptimizedNetworkImpl.hpp>
 
 #include <fstream>
-#include <iomanip>
+#include <memory>
 #include <armnn/INetwork.hpp>
 
-#include <Filesystem.hpp>
-
-BOOST_AUTO_TEST_SUITE(UtilsTests)
+#include <armnnUtils/Filesystem.hpp>
 
 using namespace android;
 using namespace android::nn;
 using namespace android::hardware;
 using namespace armnn_driver;
 
+namespace armnn
+{
+
+class Graph
+{
+public:
+    Graph(Graph&& graph) = default;
+};
+
+class MockOptimizedNetworkImpl final : public ::armnn::OptimizedNetworkImpl
+{
+public:
+    MockOptimizedNetworkImpl(const std::string& mockSerializedContent, std::unique_ptr<armnn::Graph>)
+        : ::armnn::OptimizedNetworkImpl(nullptr)
+        , m_MockSerializedContent(mockSerializedContent)
+    {}
+    ~MockOptimizedNetworkImpl() {}
+
+    ::armnn::Status PrintGraph() override { return ::armnn::Status::Failure; }
+    ::armnn::Status SerializeToDot(std::ostream& stream) const override
+    {
+        stream << m_MockSerializedContent;
+
+        return stream.good() ? ::armnn::Status::Success : ::armnn::Status::Failure;
+    }
+
+    ::arm::pipe::ProfilingGuid GetGuid() const final { return ::arm::pipe::ProfilingGuid(0); }
+
+    void UpdateMockSerializedContent(const std::string& mockSerializedContent)
+    {
+        this->m_MockSerializedContent = mockSerializedContent;
+    }
+
+private:
+    std::string m_MockSerializedContent;
+};
+
+
+} // armnn namespace
+
+
 // The following are helpers for writing unit tests for the driver.
 namespace
 {
@@ -34,10 +72,9 @@ public:
     ExportNetworkGraphFixture()
         : ExportNetworkGraphFixture("/data")
     {}
+
     ExportNetworkGraphFixture(const std::string& requestInputsAndOutputsDumpDir)
-        : m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
-        , m_FileName()
-        , m_FileStream()
+        : m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir), m_FileName(), m_FileStream()
     {
         // Set the name of the output .dot file.
         // NOTE: the export now uses a time stamp to name the file so we
@@ -53,7 +90,7 @@ public:
         m_FileStream.close();
 
         // Ignore any error (such as file not found).
-        (void)remove(m_FileName.c_str());
+        (void) remove(m_FileName.c_str());
     }
 
     bool FileExists()
@@ -102,36 +139,13 @@ private:
     std::ifstream m_FileStream;
 };
 
-class MockOptimizedNetwork final : public armnn::IOptimizedNetwork
-{
-public:
-    MockOptimizedNetwork(const std::string& mockSerializedContent)
-        : m_MockSerializedContent(mockSerializedContent)
-    {}
-    ~MockOptimizedNetwork() {}
-
-    armnn::Status PrintGraph() override { return armnn::Status::Failure; }
-    armnn::Status SerializeToDot(std::ostream& stream) const override
-    {
-        stream << m_MockSerializedContent;
-
-        return stream.good() ? armnn::Status::Success : armnn::Status::Failure;
-    }
-
-    armnn::profiling::ProfilingGuid GetGuid() const final { return armnn::profiling::ProfilingGuid(0); }
-
-    void UpdateMockSerializedContent(const std::string& mockSerializedContent)
-    {
-        this->m_MockSerializedContent = mockSerializedContent;
-    }
-
-private:
-    std::string m_MockSerializedContent;
-};
 
 } // namespace
 
-BOOST_AUTO_TEST_CASE(ExportToEmptyDirectory)
+DOCTEST_TEST_SUITE("UtilsTests")
+{
+
+DOCTEST_TEST_CASE("ExportToEmptyDirectory")
 {
     // Set the fixture for this test.
     ExportNetworkGraphFixture fixture("");
@@ -140,17 +154,21 @@ BOOST_AUTO_TEST_CASE(ExportToEmptyDirectory)
     std::string mockSerializedContent = "This is a mock serialized content.";
 
     // Set a mock optimized network.
-    MockOptimizedNetwork mockOptimizedNetwork(mockSerializedContent);
+    std::unique_ptr<armnn::Graph> graphPtr;
+
+    std::unique_ptr<::armnn::OptimizedNetworkImpl> mockImpl(
+        new armnn::MockOptimizedNetworkImpl(mockSerializedContent, std::move(graphPtr)));
+    ::armnn::IOptimizedNetwork mockOptimizedNetwork(std::move(mockImpl));
 
     // Export the mock optimized network.
     fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
-                                              fixture.m_RequestInputsAndOutputsDumpDir);
+                                                                   fixture.m_RequestInputsAndOutputsDumpDir);
 
     // Check that the output file does not exist.
-    BOOST_TEST(!fixture.FileExists());
+    DOCTEST_CHECK(!fixture.FileExists());
 }
 
-BOOST_AUTO_TEST_CASE(ExportNetwork)
+DOCTEST_TEST_CASE("ExportNetwork")
 {
     // Set the fixture for this test.
     ExportNetworkGraphFixture fixture;
@@ -159,20 +177,25 @@ BOOST_AUTO_TEST_CASE(ExportNetwork)
     std::string mockSerializedContent = "This is a mock serialized content.";
 
     // Set a mock optimized network.
-    MockOptimizedNetwork mockOptimizedNetwork(mockSerializedContent);
+    std::unique_ptr<armnn::Graph> graphPtr;
+
+    std::unique_ptr<::armnn::OptimizedNetworkImpl> mockImpl(
+        new armnn::MockOptimizedNetworkImpl(mockSerializedContent, std::move(graphPtr)));
+    ::armnn::IOptimizedNetwork mockOptimizedNetwork(std::move(mockImpl));
+
 
     // Export the mock optimized network.
     fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
-                                              fixture.m_RequestInputsAndOutputsDumpDir);
+                                                                   fixture.m_RequestInputsAndOutputsDumpDir);
 
     // Check that the output file exists and that it has the correct name.
-    BOOST_TEST(fixture.FileExists());
+    DOCTEST_CHECK(fixture.FileExists());
 
     // Check that the content of the output file matches the mock content.
-    BOOST_TEST(fixture.GetFileContent() == mockSerializedContent);
+    DOCTEST_CHECK(fixture.GetFileContent() == mockSerializedContent);
 }
 
-BOOST_AUTO_TEST_CASE(ExportNetworkOverwriteFile)
+DOCTEST_TEST_CASE("ExportNetworkOverwriteFile")
 {
     // Set the fixture for this test.
     ExportNetworkGraphFixture fixture;
@@ -181,34 +204,42 @@ BOOST_AUTO_TEST_CASE(ExportNetworkOverwriteFile)
     std::string mockSerializedContent = "This is a mock serialized content.";
 
     // Set a mock optimized network.
-    MockOptimizedNetwork mockOptimizedNetwork(mockSerializedContent);
+    std::unique_ptr<armnn::Graph> graphPtr;
+
+    std::unique_ptr<::armnn::OptimizedNetworkImpl> mockImpl(
+        new armnn::MockOptimizedNetworkImpl(mockSerializedContent, std::move(graphPtr)));
+    ::armnn::IOptimizedNetwork mockOptimizedNetwork(std::move(mockImpl));
 
     // Export the mock optimized network.
     fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
-                                              fixture.m_RequestInputsAndOutputsDumpDir);
+                                                                   fixture.m_RequestInputsAndOutputsDumpDir);
 
     // Check that the output file exists and that it has the correct name.
-    BOOST_TEST(fixture.FileExists());
+    DOCTEST_CHECK(fixture.FileExists());
 
     // Check that the content of the output file matches the mock content.
-    BOOST_TEST(fixture.GetFileContent() == mockSerializedContent);
+    DOCTEST_CHECK(fixture.GetFileContent() == mockSerializedContent);
 
     // Update the mock serialized content of the network.
     mockSerializedContent = "This is ANOTHER mock serialized content!";
-    mockOptimizedNetwork.UpdateMockSerializedContent(mockSerializedContent);
+    std::unique_ptr<armnn::Graph> graphPtr2;
+    std::unique_ptr<::armnn::OptimizedNetworkImpl> mockImpl2(
+        new armnn::MockOptimizedNetworkImpl(mockSerializedContent, std::move(graphPtr2)));
+    static_cast<armnn::MockOptimizedNetworkImpl*>(mockImpl2.get())->UpdateMockSerializedContent(mockSerializedContent);
+    ::armnn::IOptimizedNetwork mockOptimizedNetwork2(std::move(mockImpl2));
 
     // Export the mock optimized network.
-    fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
-                                              fixture.m_RequestInputsAndOutputsDumpDir);
+    fixture.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork2,
+                                                                   fixture.m_RequestInputsAndOutputsDumpDir);
 
     // Check that the output file still exists and that it has the correct name.
-    BOOST_TEST(fixture.FileExists());
+    DOCTEST_CHECK(fixture.FileExists());
 
     // Check that the content of the output file matches the mock content.
-    BOOST_TEST(fixture.GetFileContent() == mockSerializedContent);
+    DOCTEST_CHECK(fixture.GetFileContent() == mockSerializedContent);
 }
 
-BOOST_AUTO_TEST_CASE(ExportMultipleNetworks)
+DOCTEST_TEST_CASE("ExportMultipleNetworks")
 {
     // Set the fixtures for this test.
     ExportNetworkGraphFixture fixture1;
@@ -219,36 +250,40 @@ BOOST_AUTO_TEST_CASE(ExportMultipleNetworks)
     std::string mockSerializedContent = "This is a mock serialized content.";
 
     // Set a mock optimized network.
-    MockOptimizedNetwork mockOptimizedNetwork(mockSerializedContent);
+    std::unique_ptr<armnn::Graph> graphPtr;
+
+    std::unique_ptr<::armnn::OptimizedNetworkImpl> mockImpl(
+        new armnn::MockOptimizedNetworkImpl(mockSerializedContent, std::move(graphPtr)));
+    ::armnn::IOptimizedNetwork mockOptimizedNetwork(std::move(mockImpl));
 
     // Export the mock optimized network.
     fixture1.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
-                                              fixture1.m_RequestInputsAndOutputsDumpDir);
+                                                                    fixture1.m_RequestInputsAndOutputsDumpDir);
 
     // Check that the output file exists and that it has the correct name.
-    BOOST_TEST(fixture1.FileExists());
+    DOCTEST_CHECK(fixture1.FileExists());
 
     // Check that the content of the output file matches the mock content.
-    BOOST_TEST(fixture1.GetFileContent() == mockSerializedContent);
+    DOCTEST_CHECK(fixture1.GetFileContent() == mockSerializedContent);
 
     // Export the mock optimized network.
     fixture2.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
-                                              fixture2.m_RequestInputsAndOutputsDumpDir);
+                                                                    fixture2.m_RequestInputsAndOutputsDumpDir);
 
     // Check that the output file exists and that it has the correct name.
-    BOOST_TEST(fixture2.FileExists());
+    DOCTEST_CHECK(fixture2.FileExists());
 
     // Check that the content of the output file matches the mock content.
-    BOOST_TEST(fixture2.GetFileContent() == mockSerializedContent);
+    DOCTEST_CHECK(fixture2.GetFileContent() == mockSerializedContent);
 
     // Export the mock optimized network.
     fixture3.m_FileName = armnn_driver::ExportNetworkGraphToDotFile(mockOptimizedNetwork,
-                                              fixture3.m_RequestInputsAndOutputsDumpDir);
+                                                                    fixture3.m_RequestInputsAndOutputsDumpDir);
     // Check that the output file exists and that it has the correct name.
-    BOOST_TEST(fixture3.FileExists());
+    DOCTEST_CHECK(fixture3.FileExists());
 
     // Check that the content of the output file matches the mock content.
-    BOOST_TEST(fixture3.GetFileContent() == mockSerializedContent);
+    DOCTEST_CHECK(fixture3.GetFileContent() == mockSerializedContent);
 }
 
-BOOST_AUTO_TEST_SUITE_END()
+}
author	Renato Grottesi <otaner@google.com>	2023-07-20 07:28:34 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	2023-07-20 07:28:34 +0000
commit	83a2dedd5dd006beb9149548c0fdb2bc68feac63 (patch)
tree	6fd69f1ccfe17fc12f05419b5ef10dbbc0c436bc
parent	8f0ed0e336c2311b462b65df893cc085be4fd70e (diff)
parent	b0ff36ab8d0072fc4720f03ee6a1eced168e276e (diff)
download	android-nn-driver-83a2dedd5dd006beb9149548c0fdb2bc68feac63.tar.gz