Upgrade OpenCL-CTS to v2023-05-16-00 am: af503179bb am: 7be916e68d am: 8660fe4d55 am: b8ba8e487f

Original change: https://android-review.googlesource.com/c/platform/external/OpenCL-CTS/+/2676035 Change-Id: If8b5f65cb9a9f3af3f76b67fed40eaf9b00885c0 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
author: Sadaf Ebrahimi <sadafebrahimi@google.com> 2023-07-25 22:33:25 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> 2023-07-25 22:33:25 +0000
commit: 150005d3d187a681ba110128ab0272b18cbcb468 (patch)
tree: e96b89c45428ba1382321718785a473dbf855b53
parent: 6b9ff13286194c7a2b38d624eeee38dc35987dc8 (diff)
parent: b8ba8e487f4aa32d8bc42a019e4bc8bc2e330b99 (diff)
download: OpenCL-CTS-150005d3d187a681ba110128ab0272b18cbcb468.tar.gz
274 files changed, 24169 insertions, 20173 deletions
diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 1dfdb963..1ba63abd 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -28,7 +28,7 @@ jobs:
             debug: 1
             extra: " debug"
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - name: Setup Ninja
         uses: seanmiddleditch/gha-setup-ninja@master
       - name: Setup OpenGL build dependencies
@@ -59,7 +59,7 @@ jobs:
     steps:
       - name: Install packages
         run: sudo apt install -y clang-format clang-format-9
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0
       - name: Check code format
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6a25d5b5..4fce58d8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,6 +17,7 @@ add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_0_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_2_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_1_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_1_0_APIS=1)
+add_definitions(-DCL_NO_EXTENSION_PROTOTYPES)
 
 option(USE_CL_EXPERIMENTAL "Use Experimental definitions" OFF)
 if(USE_CL_EXPERIMENTAL)
@@ -30,6 +31,7 @@ option(D3D10_IS_SUPPORTED "Run DirectX 10 interop tests" OFF)
 option(D3D11_IS_SUPPORTED "Run DirectX 11 interop tests" OFF)
 option(GL_IS_SUPPORTED "Run OpenGL interop tests" OFF)
 option(GLES_IS_SUPPORTED "Run OpenGL ES interop tests" OFF)
+option(VULKAN_IS_SUPPORTED "Run Vulkan interop tests" OFF)
 
 
 #-----------------------------------------------------------
@@ -81,20 +83,35 @@ endif()
 
 macro(add_cxx_flag_if_supported flag)
     string(REGEX REPLACE "[-=+]" "" FLAG_NO_SIGNS ${flag})
-    check_cxx_compiler_flag(${flag} COMPILER_SUPPORTS_${FLAG_NO_SIGNS})
+    set(FLAG_TO_TEST ${flag})
+    if((${flag} MATCHES "^-Wno-") AND NOT (${flag} MATCHES "^-Wno-error="))
+        # -Wno-... only causes a diagnostic if another diagnostic is emitted.
+        # Change such flags into a -W... flag to test if the warning is known.
+        string(REGEX REPLACE "^-Wno-" "-W" FLAG_TO_TEST ${flag})
+    endif()
+    check_cxx_compiler_flag(${FLAG_TO_TEST} COMPILER_SUPPORTS_${FLAG_NO_SIGNS})
     if(COMPILER_SUPPORTS_${FLAG_NO_SIGNS})
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}")
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}")
     endif()
 endmacro(add_cxx_flag_if_supported)
 
 if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
     add_cxx_flag_if_supported(-Wmisleading-indentation)
+    add_cxx_flag_if_supported(-Wunused-function)
     add_cxx_flag_if_supported(-Wunused-variable)
+    add_cxx_flag_if_supported(-Werror)
+    if(NOT CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo|MinSizeRel")
+        # Enable more warnings if not doing a release build.
+        add_cxx_flag_if_supported(-Wall)
+        # Suppress warnings that currently trigger on the code base.
+        # This list should shrink over time when warnings are fixed.
+        add_cxx_flag_if_supported(-Wno-sometimes-uninitialized)
+        add_cxx_flag_if_supported(-Wno-sign-compare)
+    endif()
     add_cxx_flag_if_supported(-Wno-narrowing)
     add_cxx_flag_if_supported(-Wno-format)
-    add_cxx_flag_if_supported(-Werror)
     add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
-    add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785
+    add_cxx_flag_if_supported(-Wno-unknown-pragmas) # Issue #785
     add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784
 
     # -msse -mfpmath=sse to force gcc to use sse for float math,
@@ -124,6 +141,12 @@ macro(set_gnulike_module_compile_flags flags)
     endif()
 endmacro(set_gnulike_module_compile_flags)
 
+# Xcode 14.1 deprecated functions such as sprintf.
+# Suppress such warnings for now, see Issue #1626
+if(APPLE)
+    add_cxx_flag_if_supported(-Wno-deprecated-declarations)
+endif(APPLE)
+
 if(MSVC)
     # Don't warn when using standard non-secure functions.
     add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
diff --git a/METADATA b/METADATA
index 235bc5aa..41886b71 100644
--- a/METADATA
+++ b/METADATA
@@ -1,6 +1,6 @@
 # This project was upgraded with external_updater.
 # Usage: tools/external_updater/updater.sh update OpenCL-CTS
-# For more info, check https://cs.android.com/android/platform/superproject/+/master:tools/external_updater/README.md
+# For more info, check https://cs.android.com/android/platform/superproject/+/main:tools/external_updater/README.md
 
 name: "OpenCL-CTS"
 description: "OpenCL Conformance Tests"
@@ -9,11 +9,11 @@ third_party {
     type: GIT
     value: "https://github.com/KhronosGroup/OpenCL-CTS.git"
   }
-  version: "90a5183ec499d5b4701f58f6134dd424d82c4dca"
+  version: "v2023-05-16-00"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2022
-    month: 10
-    day: 26
+    year: 2023
+    month: 7
+    day: 25
   }
 }
diff --git a/README.md b/README.md
index 3d410644..34322f48 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # OpenCL Conformance Test Suite (CTS)
 
-This it the OpenCL CTS for all versions of the Khronos
+This is the OpenCL CTS for all versions of the Khronos
 [OpenCL](https://www.khronos.org/opencl/) standard.
 
 ## Building the CTS
@@ -89,7 +89,7 @@ require compilation, these are:
 ## Generating a Conformance Report
 
 The Khronos [Conformance Process Document](https://members.khronos.org/document/dl/911)
-details the steps required for a conformance submissions.
+details the steps required for a conformance submission.
 In this repository [opencl_conformance_tests_full.csv](test_conformance/submission_details_template.txt)
 defines the full list of tests which must be run for conformance. The output log
 of which must be included alongside a filled in
@@ -105,7 +105,7 @@ the version of the repository conformance submissions are made against.
 
 Contributions are welcome to the project from Khronos members and non-members
 alike via GitHub Pull Requests (PR). Alternatively, if you've found a bug or have
-a questions please file an issue in the GitHub project. First time contributors
+a question please file an issue in the GitHub project. First time contributors
 will be required to sign the Khronos Contributor License Agreement (CLA) before
 their PR can be merged.
 
diff --git a/presubmit.sh b/presubmit.sh
index ca39b9a2..605c10b0 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -13,6 +13,7 @@ TOOLCHAIN_PREFIX_aarch64=aarch64-linux-gnu
 TOOLCHAIN_FILE=${TOP}/toolchain.cmake
 touch ${TOOLCHAIN_FILE}
 BUILD_OPENGL_TEST="OFF"
+BUILD_VULKAN_TEST="ON"
 
 cmake --version
 echo
@@ -102,6 +103,7 @@ cmake .. -G Ninja \
       -DOPENCL_LIBRARIES="${CMAKE_OPENCL_LIBRARIES_OPTION}" \
       -DUSE_CL_EXPERIMENTAL=ON \
       -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \
+      -DVULKAN_IS_SUPPORTED=${BUILD_VULKAN_TEST} \
       -DVULKAN_INCLUDE_DIR=${TOP}/Vulkan-Headers/include/ \
       -DVULKAN_LIB_DIR=${TOP}/Vulkan-Loader/build/loader/
 cmake --build . -j3
diff --git a/test_common/autotest/test_suite.hpp b/test_common/autotest/test_suite.hpp
index 3616bee4..b831b9e6 100644
--- a/test_common/autotest/test_suite.hpp
+++ b/test_common/autotest/test_suite.hpp
@@ -49,9 +49,11 @@ namespace detail {
 
 struct test_case_registration
 {
-    test_case_registration(const std::string& name, const basefn ptr)
+    test_case_registration(const std::string& name,
+                           const test_function_pointer ptr)
     {
-        ::autotest::test_suite::global_test_suite().add(test_definition({ptr, strdup(name.c_str())}));
+        ::autotest::test_suite::global_test_suite().add(
+            test_definition({ ptr, strdup(name.c_str()) }));
     }
 };
 
diff --git a/test_common/gl/helpers.h b/test_common/gl/helpers.h
index 084c434a..e5120f47 100644
--- a/test_common/gl/helpers.h
+++ b/test_common/gl/helpers.h
@@ -34,7 +34,6 @@
 
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
diff --git a/test_common/gles/helpers.h b/test_common/gles/helpers.h
index 20768787..69ce7d6c 100644
--- a/test_common/gles/helpers.h
+++ b/test_common/gles/helpers.h
@@ -37,7 +37,6 @@
 
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp
index 62798045..c4abaa2e 100644
--- a/test_common/harness/ThreadPool.cpp
+++ b/test_common/harness/ThreadPool.cpp
@@ -23,6 +23,7 @@
 // or any other POSIX system
 
 #include <atomic>
+#include <vector>
 
 #if defined(_WIN32)
 #include <windows.h>
@@ -58,6 +59,12 @@ CRITICAL_SECTION gAtomicLock;
 pthread_mutex_t gAtomicLock;
 #endif
 
+#if !defined(_WIN32)
+// Keep track of pthread_t's created in ThreadPool_Init() so they can be joined
+// in ThreadPool_Exit() and avoid thread leaks.
+static std::vector<pthread_t> pthreads;
+#endif
+
 // Atomic add operator with mem barrier.  Mem barrier needed to protect state
 // modified by the worker functions.
 cl_int ThreadPool_AtomicAdd(volatile cl_int *a, cl_int b)
@@ -642,6 +649,9 @@ void ThreadPool_Init(void)
             gThreadCount = i;
             break;
         }
+#if !defined(_WIN32)
+        pthreads.push_back(tid);
+#endif // !_WIN32
     }
 
     atexit(ThreadPool_Exit);
@@ -721,7 +731,20 @@ void ThreadPool_Exit(void)
                   "still active.\n",
                   gThreadCount.load());
     else
+    {
+#if !defined(_WIN32)
+        for (pthread_t pthread : pthreads)
+        {
+            if (int err = pthread_join(pthread, nullptr))
+            {
+                log_error("Error from %d from pthread_join. Unable to join "
+                          "work threads. ThreadPool_Exit failed.\n",
+                          err);
+            }
+        }
+#endif
         log_info("Thread pool exited in a orderly fashion.\n");
+    }
 }
 
 
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index 80eb3b58..c302397e 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -153,6 +153,21 @@ static int vlog_win32(const char *format, ...);
         }                                                                      \
     } while (0)
 
+#define test_assert_event_status(comparison_operator, event)                   \
+    do                                                                         \
+    {                                                                          \
+        cl_int status;                                                         \
+        cl_int err = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,  \
+                                    sizeof(status), &status, nullptr);         \
+        test_error(err, "Could not get " #event " info");                      \
+        test_assert_error(status comparison_operator CL_COMPLETE,              \
+                          "Unexpected status for " #event);                    \
+    } while (false)
+
+#define test_assert_event_inprogress(event) test_assert_event_status(>, event)
+#define test_assert_event_terminated(event) test_assert_event_status(<, event)
+#define test_assert_event_complete(event) test_assert_event_status(==, event)
+
 extern const char *IGetErrorString(int clErrorCode);
 
 extern float Ulp_Error_Half(cl_half test, float reference);
diff --git a/test_common/harness/extensionHelpers.h b/test_common/harness/extensionHelpers.h
new file mode 100644
index 00000000..e98f67c2
--- /dev/null
+++ b/test_common/harness/extensionHelpers.h
@@ -0,0 +1,36 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _extensionHelpers_h
+#define _extensionHelpers_h
+
+// Load a specific function that is part of an OpenCL extension
+#define GET_PFN(device, fn_name)                                               \
+    fn_name##_fn fn_name = reinterpret_cast<fn_name##_fn>(                     \
+        clGetExtensionFunctionAddressForPlatform(                              \
+            getPlatformFromDevice(device), #fn_name));                         \
+    do                                                                         \
+    {                                                                          \
+        if (!fn_name)                                                          \
+        {                                                                      \
+            log_error(                                                         \
+                "ERROR: Failed to get function pointer for %s at %s:%d\n",     \
+                #fn_name, __FILE__, __LINE__);                                 \
+            return TEST_FAIL;                                                  \
+        }                                                                      \
+    } while (false)
+
+
+#endif // _extensionHelpers_h
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index f1694e88..3e1a3442 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -2152,14 +2152,6 @@ FloatPixel sample_image_pixel_float_offset(
                          lowRight[2], lowRight[3]);
             }
 
-            bool printMe = false;
-            if (x1 <= 0 || x2 <= 0 || x1 >= (int)width - 1
-                || x2 >= (int)width - 1)
-                printMe = true;
-            if (y1 <= 0 || y2 <= 0 || y1 >= (int)height - 1
-                || y2 >= (int)height - 1)
-                printMe = true;
-
             double weights[2][2];
 
             weights[0][0] = weights[0][1] = 1.0 - frac(x - 0.5f);
@@ -3545,7 +3537,6 @@ void copy_image_data(image_descriptor *srcImageInfo,
     {
         size_t src_width_lod = 1 /*srcImageInfo->width*/;
         size_t src_height_lod = 1 /*srcImageInfo->height*/;
-        size_t src_depth_lod = 1 /*srcImageInfo->depth*/;
 
         switch (srcImageInfo->type)
         {
@@ -3580,10 +3571,6 @@ void copy_image_data(image_descriptor *srcImageInfo,
                 src_height_lod = (srcImageInfo->height >> src_lod)
                     ? (srcImageInfo->height >> src_lod)
                     : 1;
-                if (srcImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-                    src_depth_lod = (srcImageInfo->depth >> src_lod)
-                        ? (srcImageInfo->depth >> src_lod)
-                        : 1;
                 break;
         }
         src_mip_level_offset = compute_mip_level_offset(srcImageInfo, src_lod);
@@ -3596,7 +3583,6 @@ void copy_image_data(image_descriptor *srcImageInfo,
     {
         size_t dst_width_lod = 1 /*dstImageInfo->width*/;
         size_t dst_height_lod = 1 /*dstImageInfo->height*/;
-        size_t dst_depth_lod = 1 /*dstImageInfo->depth*/;
         switch (dstImageInfo->type)
         {
             case CL_MEM_OBJECT_IMAGE1D:
@@ -3630,10 +3616,6 @@ void copy_image_data(image_descriptor *srcImageInfo,
                 dst_height_lod = (dstImageInfo->height >> dst_lod)
                     ? (dstImageInfo->height >> dst_lod)
                     : 1;
-                if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
-                    dst_depth_lod = (dstImageInfo->depth >> dst_lod)
-                        ? (dstImageInfo->depth >> dst_lod)
-                        : 1;
                 break;
         }
         dst_mip_level_offset = compute_mip_level_offset(dstImageInfo, dst_lod);
diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 13ebcbc9..633b05e5 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -1511,22 +1511,33 @@ size_t get_min_alignment(cl_context context)
     return align_size;
 }
 
-cl_device_fp_config get_default_rounding_mode(cl_device_id device)
+cl_device_fp_config get_default_rounding_mode(cl_device_id device,
+                                              const cl_uint &param)
 {
+    if (param == CL_DEVICE_DOUBLE_FP_CONFIG)
+        test_error_ret(
+            -1,
+            "FAILURE: CL_DEVICE_DOUBLE_FP_CONFIG not supported by this routine",
+            0);
+
     char profileStr[128] = "";
     cl_device_fp_config single = 0;
-    int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
-                                sizeof(single), &single, NULL);
+    int error = clGetDeviceInfo(device, param, sizeof(single), &single, NULL);
     if (error)
-        test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
-                       0);
+    {
+        std::string message = std::string("Unable to get device ")
+            + std::string(param == CL_DEVICE_HALF_FP_CONFIG
+                              ? "CL_DEVICE_HALF_FP_CONFIG"
+                              : "CL_DEVICE_SINGLE_FP_CONFIG");
+        test_error_ret(error, message.c_str(), 0);
+    }
 
     if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
 
     if (0 == (single & CL_FP_ROUND_TO_ZERO))
         test_error_ret(-1,
                        "FAILURE: device must support either "
-                       "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
+                       "CL_FP_ROUND_TO_ZERO or CL_FP_ROUND_TO_NEAREST",
                        0);
 
     // Make sure we are an embedded device before allowing a pass
diff --git a/test_common/harness/kernelHelpers.h b/test_common/harness/kernelHelpers.h
index 4d8f2a8f..62a07e49 100644
--- a/test_common/harness/kernelHelpers.h
+++ b/test_common/harness/kernelHelpers.h
@@ -159,7 +159,9 @@ size_t get_min_alignment(cl_context context);
 
 /* Helper to obtain the default rounding mode for single precision computation.
  * (Double is always CL_FP_ROUND_TO_NEAREST.) Returns 0 on error. */
-cl_device_fp_config get_default_rounding_mode(cl_device_id device);
+cl_device_fp_config
+get_default_rounding_mode(cl_device_id device,
+                          const cl_uint &param = CL_DEVICE_SINGLE_FP_CONFIG);
 
 #define PASSIVE_REQUIRE_IMAGE_SUPPORT(device)                                  \
     if (checkForImageSupport(device))                                          \
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index 8fc91108..3989edf6 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -30,8 +30,17 @@
 
 #if defined(__ANDROID__)
 #include <android/api-level.h>
+#include "harness/mt19937.h"
 #endif
 
+#if !defined(_WIN32)
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
+#include <unistd.h>
+#endif
+
+
 #define CHECK_PTR(ptr)                                                         \
     if ((ptr) == NULL)                                                         \
     {                                                                          \
@@ -283,7 +292,7 @@ std::string exe_path()
             exit(2);
         }; // if
 
-        if (len < path.size())
+        if (static_cast<size_t>(len) < path.size())
         {
             // We got the path.
             path.resize(len);
@@ -556,4 +565,27 @@ char* get_exe_dir()
 } // get_exe_dir
 
 
+char* get_temp_filename()
+{
+    char gFileName[256] = "";
+    // Create a unique temporary file to allow parallel executed tests.
+#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
+    sprintf(gFileName, "/tmp/tmpfile.XXXXXX");
+    int fd = mkstemp(gFileName);
+    if (fd == -1) return strdup(gFileName);
+    close(fd);
+#elif defined(_WIN32)
+    UINT ret = GetTempFileName(".", "tmp", 0, gFileName);
+    if (ret == 0) return gFileName;
+#else
+    MTdata d = init_genrand((cl_uint)time(NULL));
+    sprintf(gFileName, "tmpfile.%u", genrand_int32(d));
+#endif
+
+    char* fn = strdup(gFileName);
+    CHECK_PTR(fn);
+    return fn;
+}
+
+
 // end of file //
diff --git a/test_common/harness/os_helpers.h b/test_common/harness/os_helpers.h
index aa3080d9..0ab8507f 100644
--- a/test_common/harness/os_helpers.h
+++ b/test_common/harness/os_helpers.h
@@ -41,5 +41,7 @@ char* get_err_msg(int err); // Returns system error message. Subject to free.
 char* get_dir_sep(); // Returns dir separator. Subject to free.
 char* get_exe_path(); // Returns path of current executable. Subject to free.
 char* get_exe_dir(); // Returns dir of current executable. Subject to free.
+char* get_temp_filename(); // returns temporary file name
+
 
 #endif // __os_helpers_h__
diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index e946d744..2fc31d26 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -36,6 +36,7 @@ std::string gCompilationCachePath = ".";
 std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
 bool gDisableSPIRVValidation = false;
 std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
+unsigned gNumWorkerThreads;
 
 void helpInfo()
 {
@@ -48,6 +49,8 @@ void helpInfo()
             online     Use online compilation (default)
             binary     Use binary offline compilation
             spir-v     Use SPIR-V offline compilation
+    --num-worker-threads <num>
+        Select parallel execution with the specified number of worker threads.
 
 For offline compilation (binary and spir-v modes) only:
     --compilation-cache-mode <cache-mode>
@@ -137,6 +140,23 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
                 return -1;
             }
         }
+        else if (!strcmp(argv[i], "--num-worker-threads"))
+        {
+            delArg++;
+            if ((i + 1) < argc)
+            {
+                delArg++;
+                const char *numthstr = argv[i + 1];
+
+                gNumWorkerThreads = atoi(numthstr);
+            }
+            else
+            {
+                log_error(
+                    "A parameter to --num-worker-threads must be provided!\n");
+                return -1;
+            }
+        }
         else if (!strcmp(argv[i], "--compilation-cache-mode"))
         {
             delArg++;
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index a309f53d..95ea8163 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -20,9 +20,11 @@
 #include <stdlib.h>
 #include <string.h>
 #include <cassert>
+#include <deque>
+#include <mutex>
 #include <stdexcept>
+#include <thread>
 #include <vector>
-#include "threadTesting.h"
 #include "errorHelpers.h"
 #include "kernelHelpers.h"
 #include "fpcontrol.h"
@@ -584,10 +586,12 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
     FPU_mode_type oldMode;
     DisableFTZ(&oldMode);
 #endif
+    extern unsigned gNumWorkerThreads;
+    test_harness_config config = { forceNoContextCreation, num_elements,
+                                   queueProps, gNumWorkerThreads };
 
     int error = parseAndCallCommandLineTests(argc, argv, device, testNum,
-                                             testList, forceNoContextCreation,
-                                             queueProps, num_elements);
+                                             testList, config);
 
 #if defined(__APPLE__) && defined(__arm__)
     // Restore the old FP mode before leaving.
@@ -678,9 +682,7 @@ static void print_results(int failed, int count, const char *name)
 int parseAndCallCommandLineTests(int argc, const char *argv[],
                                  cl_device_id device, int testNum,
                                  test_definition testList[],
-                                 int forceNoContextCreation,
-                                 cl_command_queue_properties queueProps,
-                                 int num_elements)
+                                 const test_harness_config &config)
 {
     int ret = EXIT_SUCCESS;
 
@@ -726,8 +728,7 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
         std::vector<test_status> resultTestList(testNum, TEST_PASS);
 
         callTestFunctions(testList, selectedTestList, resultTestList.data(),
-                          testNum, device, forceNoContextCreation, num_elements,
-                          queueProps);
+                          testNum, device, config);
 
         print_results(gFailCount, gTestCount, "sub-test");
         print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
@@ -755,21 +756,97 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
     return ret;
 }
 
+struct test_harness_state
+{
+    test_definition *tests;
+    test_status *results;
+    cl_device_id device;
+    test_harness_config config;
+};
+
+static std::deque<int> gTestQueue;
+static std::mutex gTestStateMutex;
+
+void test_function_runner(test_harness_state *state)
+{
+    int testID;
+    test_definition test;
+    while (true)
+    {
+        // Attempt to get a test
+        {
+            std::lock_guard<std::mutex> lock(gTestStateMutex);
+
+            // The queue is empty, we're done
+            if (gTestQueue.size() == 0)
+            {
+                return;
+            }
+
+            // Get the test at the front of the queue
+            testID = gTestQueue.front();
+            gTestQueue.pop_front();
+            test = state->tests[testID];
+        }
+
+        // Execute test
+        auto status =
+            callSingleTestFunction(test, state->device, state->config);
+
+        // Store result
+        {
+            std::lock_guard<std::mutex> lock(gTestStateMutex);
+            state->results[testID] = status;
+        }
+    }
+}
+
 void callTestFunctions(test_definition testList[],
                        unsigned char selectedTestList[],
                        test_status resultTestList[], int testNum,
-                       cl_device_id deviceToUse, int forceNoContextCreation,
-                       int numElementsToUse,
-                       cl_command_queue_properties queueProps)
+                       cl_device_id deviceToUse,
+                       const test_harness_config &config)
 {
-    for (int i = 0; i < testNum; ++i)
+    // Execute tests serially
+    if (config.numWorkerThreads == 0)
     {
-        if (selectedTestList[i])
+        for (int i = 0; i < testNum; ++i)
+        {
+            if (selectedTestList[i])
+            {
+                resultTestList[i] =
+                    callSingleTestFunction(testList[i], deviceToUse, config);
+            }
+        }
+        // Execute tests in parallel with the specified number of worker threads
+    }
+    else
+    {
+        // Queue all tests that need to run
+        for (int i = 0; i < testNum; ++i)
+        {
+            if (selectedTestList[i])
+            {
+                gTestQueue.push_back(i);
+            }
+        }
+
+        // Spawn thread pool
+        std::vector<std::thread *> threads;
+        test_harness_state state = { testList, resultTestList, deviceToUse,
+                                     config };
+        for (int i = 0; i < config.numWorkerThreads; i++)
+        {
+            log_info("Spawning worker thread %i\n", i);
+            threads.push_back(new std::thread(test_function_runner, &state));
+        }
+
+        // Wait for all threads to complete
+        for (auto th : threads)
         {
-            resultTestList[i] = callSingleTestFunction(
-                testList[i], deviceToUse, forceNoContextCreation,
-                numElementsToUse, queueProps);
+            th->join();
         }
+        assert(gTestQueue.size() == 0);
     }
 }
 
@@ -782,9 +859,7 @@ void CL_CALLBACK notify_callback(const char *errinfo, const void *private_info,
 // Actual function execution
 test_status callSingleTestFunction(test_definition test,
                                    cl_device_id deviceToUse,
-                                   int forceNoContextCreation,
-                                   int numElementsToUse,
-                                   const cl_queue_properties queueProps)
+                                   const test_harness_config &config)
 {
     test_status status;
     cl_int error;
@@ -812,27 +887,30 @@ test_status callSingleTestFunction(test_definition test,
     }
 
     /* Create a context to work with, unless we're told not to */
-    if (!forceNoContextCreation)
+    if (!config.forceNoContextCreation)
     {
         context = clCreateContext(NULL, 1, &deviceToUse, notify_callback, NULL,
                                   &error);
         if (!context)
         {
             print_error(error, "Unable to create testing context");
+            gFailCount++;
+            gTestsFailed++;
             return TEST_FAIL;
         }
 
         if (device_version < Version(2, 0))
         {
-            queue =
-                clCreateCommandQueue(context, deviceToUse, queueProps, &error);
+            queue = clCreateCommandQueue(context, deviceToUse,
+                                         config.queueProps, &error);
         }
         else
         {
             const cl_command_queue_properties cmd_queueProps =
-                (queueProps) ? CL_QUEUE_PROPERTIES : 0;
-            cl_command_queue_properties queueCreateProps[] = { cmd_queueProps,
-                                                               queueProps, 0 };
+                (config.queueProps) ? CL_QUEUE_PROPERTIES : 0;
+            cl_command_queue_properties queueCreateProps[] = {
+                cmd_queueProps, config.queueProps, 0
+            };
             queue = clCreateCommandQueueWithProperties(
                 context, deviceToUse, &queueCreateProps[0], &error);
         }
@@ -841,6 +919,8 @@ test_status callSingleTestFunction(test_definition test,
         {
             print_error(error, "Unable to create testing command queue");
             clReleaseContext(context);
+            gFailCount++;
+            gTestsFailed++;
             return TEST_FAIL;
         }
     }
@@ -855,7 +935,8 @@ test_status callSingleTestFunction(test_definition test,
     }
     else
     {
-        int ret = test.func(deviceToUse, context, queue, numElementsToUse);
+        int ret =
+            test.func(deviceToUse, context, queue, config.numElementsToUse);
         if (ret == TEST_SKIPPED_ITSELF)
         {
             /* Tests can also let us know they're not supported by the
@@ -882,12 +963,14 @@ test_status callSingleTestFunction(test_definition test,
     }
 
     /* Release the context */
-    if (!forceNoContextCreation)
+    if (!config.forceNoContextCreation)
     {
         int error = clFinish(queue);
         if (error)
         {
             log_error("clFinish failed: %s\n", IGetErrorString(error));
+            gFailCount++;
+            gTestsFailed++;
             status = TEST_FAIL;
         }
         clReleaseCommandQueue(queue);
diff --git a/test_common/harness/testHarness.h b/test_common/harness/testHarness.h
index d6054de9..b266db80 100644
--- a/test_common/harness/testHarness.h
+++ b/test_common/harness/testHarness.h
@@ -16,7 +16,6 @@
 #ifndef _testHarness_h
 #define _testHarness_h
 
-#include "threadTesting.h"
 #include "clImageHelper.h"
 #include <string>
 #include <sstream>
@@ -67,9 +66,12 @@ Version get_device_cl_version(cl_device_id device);
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
+typedef int (*test_function_pointer)(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+
 typedef struct test_definition
 {
-    basefn func;
+    test_function_pointer func;
     const char *name;
     Version min_version;
 } test_definition;
@@ -83,6 +85,14 @@ typedef enum test_status
     TEST_SKIPPED_ITSELF = -100,
 } test_status;
 
+struct test_harness_config
+{
+    int forceNoContextCreation;
+    int numElementsToUse;
+    cl_command_queue_properties queueProps;
+    unsigned numWorkerThreads;
+};
+
 extern int gFailCount;
 extern int gTestCount;
 extern cl_uint gReSeed;
@@ -115,9 +125,7 @@ extern int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
 extern int parseAndCallCommandLineTests(int argc, const char *argv[],
                                         cl_device_id device, int testNum,
                                         test_definition testList[],
-                                        int forceNoContextCreation,
-                                        cl_command_queue_properties queueProps,
-                                        int num_elements);
+                                        const test_harness_config &config);
 
 // Call this function if you need to do all the setup work yourself, and just
 // need the function list called/ managed.
@@ -129,21 +137,19 @@ extern int parseAndCallCommandLineTests(int argc, const char *argv[],
 //    resultTestList is an array of statuses which contain the result of each
 //    selected test testNum is the number of tests in testList, selectedTestList
 //    and resultTestList contextProps are used to create a testing context for
-//    each test deviceToUse and numElementsToUse are all just passed to each
+//    each test deviceToUse and config are all just passed to each
 //    test function
 extern void callTestFunctions(test_definition testList[],
                               unsigned char selectedTestList[],
                               test_status resultTestList[], int testNum,
                               cl_device_id deviceToUse,
-                              int forceNoContextCreation, int numElementsToUse,
-                              cl_command_queue_properties queueProps);
+                              const test_harness_config &config);
 
 // This function is called by callTestFunctions, once per function, to do setup,
 // call, logging and cleanup
-extern test_status
-callSingleTestFunction(test_definition test, cl_device_id deviceToUse,
-                       int forceNoContextCreation, int numElementsToUse,
-                       cl_command_queue_properties queueProps);
+extern test_status callSingleTestFunction(test_definition test,
+                                          cl_device_id deviceToUse,
+                                          const test_harness_config &config);
 
 ///// Miscellaneous steps
 
diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h
deleted file mode 100644
index 2f3c1873..00000000
--- a/test_common/harness/threadTesting.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#ifndef _threadTesting_h
-#define _threadTesting_h
-
-#ifdef __APPLE__
-#include <OpenCL/opencl.h>
-#else
-#include <CL/opencl.h>
-#endif
-
-typedef int (*basefn)(cl_device_id deviceID, cl_context context,
-                      cl_command_queue queue, int num_elements);
-
-#endif // _threadTesting_h
-\ No newline at end of file
diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt
index f9514f1e..1f1970af 100644
--- a/test_conformance/CMakeLists.txt
+++ b/test_conformance/CMakeLists.txt
@@ -52,7 +52,10 @@ add_subdirectory( pipes )
 add_subdirectory( device_timer )
 add_subdirectory( spirv_new )
 add_subdirectory( spir )
-add_subdirectory( vulkan )
+if(VULKAN_IS_SUPPORTED)
+    add_subdirectory( common/vulkan_wrapper )
+    add_subdirectory( vulkan )
+endif()
 
 file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")
 
diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt
index c56cc57a..2d01a825 100644
--- a/test_conformance/SVM/CMakeLists.txt
+++ b/test_conformance/SVM/CMakeLists.txt
@@ -1,4 +1,3 @@
-set(CMAKE_CXX_STANDARD 11)
 set(MODULE_NAME SVM)
 
 set(${MODULE_NAME}_SOURCES
diff --git a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
index 12358167..d8a3dcad 100644
--- a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
+++ b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
@@ -57,7 +57,6 @@ cl_int create_linked_lists_on_host(cl_command_queue cmdq, cl_mem nodes, Node *pN
 cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes, Node *pNodes2, cl_int ListLength, size_t numLists, cl_bool useNewAPI )
 {
   cl_int error = CL_SUCCESS;
-  cl_int correct_count;
 
   Node *pNodes;
   if (useNewAPI == CL_FALSE)
@@ -72,8 +71,6 @@ cl_int verify_linked_lists_on_host(int ci, cl_command_queue cmdq, cl_mem nodes,
     test_error2(error, pNodes, "clEnqueueSVMMap failed");
   }
 
-  correct_count = 0;
-
   error = verify_linked_lists(pNodes, numLists, ListLength);
   if(error) return -1;
 
diff --git a/test_conformance/api/test_api_consistency.cpp b/test_conformance/api/test_api_consistency.cpp
index d6c4bba7..cc7e190a 100644
--- a/test_conformance/api/test_api_consistency.cpp
+++ b/test_conformance/api/test_api_consistency.cpp
@@ -647,7 +647,7 @@ int test_consistency_read_write_images(cl_device_id deviceID,
         CL_MEM_OBJECT_IMAGE2D,       CL_MEM_OBJECT_IMAGE3D,
         CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY,
     };
-    for (int i = 0; i < ARRAY_SIZE(image_types); i++)
+    for (size_t i = 0; i < ARRAY_SIZE(image_types); i++)
     {
         cl_uint numImageFormats = 0;
         error = clGetSupportedImageFormats(
@@ -801,7 +801,7 @@ int test_consistency_depth_images(cl_device_id deviceID, cl_context context,
         CL_MEM_READ_WRITE,
         CL_MEM_KERNEL_READ_AND_WRITE,
     };
-    for (int i = 0; i < ARRAY_SIZE(mem_flags); i++)
+    for (size_t i = 0; i < ARRAY_SIZE(mem_flags); i++)
     {
         cl_uint numImageFormats = 0;
         error = clGetSupportedImageFormats(context, mem_flags[i],
@@ -1114,7 +1114,7 @@ int test_consistency_3d_image_writes(cl_device_id deviceID, cl_context context,
         CL_MEM_READ_WRITE,
         CL_MEM_KERNEL_READ_AND_WRITE,
     };
-    for (int i = 0; i < ARRAY_SIZE(mem_flags); i++)
+    for (size_t i = 0; i < ARRAY_SIZE(mem_flags); i++)
     {
         cl_uint numImageFormats = 0;
         error = clGetSupportedImageFormats(context, mem_flags[i],
diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 086008d7..3ced8927 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -1185,7 +1185,7 @@ int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context,
 int test_min_max_parameter_size(cl_device_id deviceID, cl_context context,
                                 cl_command_queue queue, int num_elements)
 {
-    int error, retVal, i;
+    int error, i;
     size_t maxSize;
     char *programSrc;
     char *ptr;
@@ -1320,8 +1320,6 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context,
         }
 
         /* Try to set a large argument to the kernel */
-        retVal = 0;
-
         mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long), NULL,
                              &error);
         test_error(error, "clCreateBuffer failed");
@@ -2292,7 +2290,7 @@ int test_min_max_device_version(cl_device_id deviceID, cl_context context,
     {
         log_info("Checking for required extensions for OpenCL 1.1 and later "
                  "devices...\n");
-        for (int i = 0; i < ARRAY_SIZE(requiredExtensions11); i++)
+        for (size_t i = 0; i < ARRAY_SIZE(requiredExtensions11); i++)
         {
             if (!is_extension_available(deviceID, requiredExtensions11[i]))
             {
@@ -2337,7 +2335,7 @@ int test_min_max_device_version(cl_device_id deviceID, cl_context context,
         {
             log_info("Checking for required extensions for OpenCL 2.0, 2.1 and "
                      "2.2 devices...\n");
-            for (int i = 0; i < ARRAY_SIZE(requiredExtensions2x); i++)
+            for (size_t i = 0; i < ARRAY_SIZE(requiredExtensions2x); i++)
             {
                 if (!is_extension_available(deviceID, requiredExtensions2x[i]))
                 {
diff --git a/test_conformance/api/test_clone_kernel.cpp b/test_conformance/api/test_clone_kernel.cpp
index 1a7e67a6..cc95c9b0 100644
--- a/test_conformance/api/test_clone_kernel.cpp
+++ b/test_conformance/api/test_clone_kernel.cpp
@@ -94,18 +94,6 @@ struct structArg
     float f;
 };
 
-static unsigned char *
-generate_8888_image(int w, int h, MTdata d)
-{
-    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned char)genrand_int32( d);
-
-    return ptr;
-}
-
 int test_image_arg_shallow_clone(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, void* pbufRes, clMemWrapper& bufOut)
 {
     int error;
@@ -323,6 +311,14 @@ int test_clone_kernel(cl_device_id deviceID, cl_context context, cl_command_queu
     clKernelWrapper clonek = clCloneKernel(kernel, &error);
     test_error( error, "clCloneKernel failed." );
 
+    // enqueue the kernel before the last arg is set
+    error = clEnqueueNDRangeKernel(queue, clonek, 1, NULL, &ndrange1, NULL, 0,
+                                   NULL, NULL);
+    test_failure_error(error, CL_INVALID_KERNEL_ARGS,
+                       "A kernel cloned before all args are set should return "
+                       "CL_INVALID_KERNEL_ARGS if enqueued before the "
+                       "remaining args are set");
+
     // set the last arg and enqueue
     error = clSetKernelArg(clonek, 4, sizeof(cl_mem), &bufOut);
     test_error( error, "clSetKernelArg failed." );
diff --git a/test_conformance/api/test_create_kernels.cpp b/test_conformance/api/test_create_kernels.cpp
index 568e84cb..0aa43614 100644
--- a/test_conformance/api/test_create_kernels.cpp
+++ b/test_conformance/api/test_create_kernels.cpp
@@ -406,8 +406,8 @@ int test_get_program_info_kernel_names( cl_device_id deviceID, cl_context contex
 
     if (i == sizeof( actual_names ) / sizeof( actual_names[0] ) )
     {
-        free(kernel_names);
         log_error( "Kernel names \"%s\" did not match:\n", kernel_names );
+        free(kernel_names);
         for( i = 0; i < sizeof( actual_names ) / sizeof( actual_names[0] ); i++ )
             log_error( "\t\t\"%s\"\n", actual_names[0] );
         return -1;
diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index d0681dfd..f862ceda 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -178,7 +178,7 @@ static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
         ret += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
     }
     ret += "kernel void get_kernel_arg_info(\n";
-    for (int i = 0; i < all_args.size(); ++i)
+    for (size_t i = 0; i < all_args.size(); ++i)
     {
         ret += generate_argument(all_args[i]);
         if (i == all_args.size() - 1)
@@ -492,7 +492,7 @@ compare_kernel_with_expected(cl_context context, cl_device_id deviceID,
         context, &program, &kernel, 1, &kernel_src, "get_kernel_arg_info",
         get_build_options(deviceID).c_str());
     test_error(err, "create_single_kernel_helper_with_build_options");
-    for (int i = 0; i < expected_args.size(); ++i)
+    for (size_t i = 0; i < expected_args.size(); ++i)
     {
         KernelArgInfo actual;
         err = clGetKernelArgInfo(kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER,
diff --git a/test_conformance/api/test_min_image_formats.cpp b/test_conformance/api/test_min_image_formats.cpp
index f6a35463..5da8a162 100644
--- a/test_conformance/api/test_min_image_formats.cpp
+++ b/test_conformance/api/test_min_image_formats.cpp
@@ -71,11 +71,11 @@ int test_min_image_formats(cl_device_id device, cl_context context,
     int supports_3D_image_writes =
         is_extension_available(device, "cl_khr_3d_image_writes");
 
-    for (int t = 0; t < ARRAY_SIZE(image_types); t++)
+    for (size_t t = 0; t < ARRAY_SIZE(image_types); t++)
     {
         const cl_mem_object_type type = image_types[t];
         log_info("    testing %s...\n", convert_image_type_to_string(type));
-        for (int f = 0; f < ARRAY_SIZE(mem_flags); f++)
+        for (size_t f = 0; f < ARRAY_SIZE(mem_flags); f++)
         {
             const cl_mem_flags flags = mem_flags[f];
             const char* testTypeString = flags == CL_MEM_READ_ONLY
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index a7703a76..fa5c227f 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -162,8 +162,8 @@ static cl_filter_mode filter_mode_values[] = { CL_FILTER_NEAREST,
                                                CL_FILTER_LINEAR };
 
 int test_sampler_params(cl_device_id deviceID, cl_context context,
-                        bool is_compatibility, int norm_coord_num,
-                        int addr_mod_num, int filt_mod_num)
+                        bool is_compatibility, size_t norm_coord_num,
+                        size_t addr_mod_num, size_t filt_mod_num)
 {
     cl_uint refCount;
     size_t size;
@@ -272,10 +272,10 @@ int test_sampler_params(cl_device_id deviceID, cl_context context,
 int get_sampler_info_params(cl_device_id deviceID, cl_context context,
                             bool is_compatibility)
 {
-    for (int norm_coord_num = 0;
+    for (size_t norm_coord_num = 0;
          norm_coord_num < ARRAY_SIZE(normalized_coord_values); norm_coord_num++)
     {
-        for (int addr_mod_num = 0;
+        for (size_t addr_mod_num = 0;
              addr_mod_num < ARRAY_SIZE(addressing_mode_values); addr_mod_num++)
         {
             if ((normalized_coord_values[norm_coord_num] == CL_FALSE)
@@ -285,7 +285,7 @@ int get_sampler_info_params(cl_device_id deviceID, cl_context context,
             {
                 continue;
             }
-            for (int filt_mod_num = 0;
+            for (size_t filt_mod_num = 0;
                  filt_mod_num < ARRAY_SIZE(filter_mode_values); filt_mod_num++)
             {
                 int err = test_sampler_params(deviceID, context,
diff --git a/test_conformance/api/test_queue_properties.cpp b/test_conformance/api/test_queue_properties.cpp
index 7975ec93..62d0a734 100644
--- a/test_conformance/api/test_queue_properties.cpp
+++ b/test_conformance/api/test_queue_properties.cpp
@@ -60,7 +60,7 @@ int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop
     clCommandQueueWrapper queue = clCreateCommandQueueWithPropertiesKHR(context, deviceID, queue_prop_def, &error);
     test_error(error, "clCreateCommandQueueWithPropertiesKHR failed");
 
-    for (int i = 0; i < num_elements; ++i)
+    for (size_t i = 0; i < num_elements; ++i)
     {
         buf[i] = i;
     }
@@ -85,9 +85,9 @@ int enqueue_kernel(cl_context context, const cl_queue_properties_khr *queue_prop
     error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, num_elements, buf.data(), 0, NULL, NULL);
     test_error( error, "clEnqueueReadBuffer failed." );
 
-    for (int i = 0; i < num_elements; ++i)
+    for (size_t i = 0; i < num_elements; ++i)
     {
-        if (buf[i] != i)
+        if (static_cast<size_t>(buf[i]) != i)
         {
             log_error("ERROR: Incorrect vector copy result.");
             return -1;
@@ -162,4 +162,4 @@ int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_
     }
 
     return 0;
-}
-\ No newline at end of file
+}
diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp
index 61d9a524..9a3bf959 100644
--- a/test_conformance/api/test_sub_group_dispatch.cpp
+++ b/test_conformance/api/test_sub_group_dispatch.cpp
@@ -127,7 +127,9 @@ int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_comman
     log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_subgroup_count);
 
     if (realSize != sizeof(kernel_subgroup_count)) {
-        log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
+        log_error("ERROR: Returned size of sub group count not valid! "
+                  "(Expected %d, got %d)\n",
+                  (int)sizeof(kernel_subgroup_count), (int)realSize);
         return -1;
     }
 
@@ -135,7 +137,6 @@ int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_comman
     for (size_t i = kernel_subgroup_count; i > 0; --i)
     {
         // test all 3 different dimention of requested local size
-        size_t expect_size = kernel_max_subgroup_size * i;
         size_t kernel_ret_size = 0;
         error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
         test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
@@ -144,12 +145,6 @@ int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_comman
             return -1;
         }
 
-        if (ret_ndrange1d != expect_size)
-        {
-            log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange1d );
-            return -1;
-        }
-
         error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange1d, 1);
         test_error(error, "Failed to query number of subgroups from kernel");
         if (i != kernel_ret_size)
@@ -166,12 +161,6 @@ int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_comman
         }
 
         ret_ndrange2d_flattened = flatten_ndrange(ret_ndrange2d, 2);
-        if (ret_ndrange2d_flattened != expect_size ||
-            ret_ndrange2d[1] != 1)
-        {
-            log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange2d_flattened );
-            return -1;
-        }
 
         error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange2d_flattened, 2);
         test_error(error, "Failed to query number of subgroups from kernel");
@@ -189,13 +178,6 @@ int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_comman
         }
 
         ret_ndrange3d_flattened = flatten_ndrange(ret_ndrange3d, 3);
-        if (ret_ndrange3d_flattened != expect_size ||
-            ret_ndrange3d[1] != 1 ||
-            ret_ndrange3d[2] != 1)
-        {
-            log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange3d_flattened );
-            return -1;
-        }
 
         error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange3d_flattened, 3);
         test_error(error, "Failed to query number of subgroups from kernel");
diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h
index fa85aad5..46bb34bd 100644
--- a/test_conformance/atomics/procs.h
+++ b/test_conformance/atomics/procs.h
@@ -15,7 +15,6 @@
 //
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 
 extern int create_program_and_kernel(const char *source,
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index 2bba3e24..7da2dfa7 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -201,7 +201,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     int number_of_bins = number_of_items / divisor;
     int max_counts_per_bin = divisor * 2;
 
-    int fail = 0;
     int err;
 
     clProgramWrapper program;
@@ -345,7 +344,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     {
         log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
                   err);
-        fail = 1;
         return -1;
     }
 
@@ -354,7 +352,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     if (err)
     {
         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
-        fail = 1;
+        return -1;
     }
 
     cl_int *final_bin_assignments =
@@ -372,7 +370,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     if (err)
     {
         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
-        fail = 1;
+        return -1;
     }
 
     cl_int *final_bin_counts =
@@ -390,7 +388,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     {
         log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
                   err);
-        fail = 1;
+        return -1;
     }
 
     // Verification.
diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
index c5c4b5f0..dde3311d 100644
--- a/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/basic/CMakeLists.txt
@@ -2,13 +2,12 @@ set(MODULE_NAME BASIC)
 
 set(${MODULE_NAME}_SOURCES
     main.cpp
-    test_fpmath_float.cpp test_fpmath_float2.cpp test_fpmath_float4.cpp
+    test_fpmath_float.cpp
     test_intmath.cpp
     test_hiloeo.cpp test_local.cpp test_pointercast.cpp
     test_if.cpp test_loop.cpp
-    test_readimage.cpp test_readimage_int16.cpp test_readimage_fp32.cpp
-    test_readimage3d.cpp test_readimage3d_int16.cpp test_readimage3d_fp32.cpp
-    test_writeimage.cpp test_writeimage_int16.cpp test_writeimage_fp32.cpp
+    test_readimage.cpp
+    test_writeimage.cpp
     test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp
     test_imagedim.cpp
     test_vloadstore.cpp
@@ -31,7 +30,6 @@ set(${MODULE_NAME}_SOURCES
     test_imagecopy.cpp
     test_imagerandomcopy.cpp
     test_arrayimagecopy.cpp
-    test_arrayimagecopy3d.cpp
     test_imagecopy3d.cpp
     test_enqueue_map.cpp
     test_work_item_functions.cpp
@@ -72,4 +70,6 @@ if(APPLE)
     list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp)
 endif(APPLE)
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h
index 4a01a8cb..c14340de 100644
--- a/test_conformance/basic/procs.h
+++ b/test_conformance/basic/procs.h
@@ -20,8 +20,6 @@
 #include "harness/conversions.h"
 #include "harness/rounding_mode.h"
 
-extern void     memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
-
 extern int      test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int      test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/basic/test_arrayimagecopy.cpp b/test_conformance/basic/test_arrayimagecopy.cpp
index 5de5d017..906070d0 100644
--- a/test_conformance/basic/test_arrayimagecopy.cpp
+++ b/test_conformance/basic/test_arrayimagecopy.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -14,134 +14,173 @@
 // limitations under the License.
 //
 #include "harness/compat.h"
+#include "harness/imageHelpers.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <vector>
 
 #include "procs.h"
 
-int test_arrayimagecopy_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
+int test_arrayimagecopy_single_format(cl_device_id device, cl_context context,
+                                      cl_command_queue queue,
+                                      cl_mem_flags flags,
+                                      cl_mem_object_type image_type,
+                                      const cl_image_format *format)
 {
-  cl_uchar    *bufptr, *imgptr;
-  clMemWrapper      buffer, image;
-  int        img_width = 512;
-  int        img_height = 512;
-  size_t    elem_size;
-  size_t    buffer_size;
-  int        i;
-  cl_int          err;
-  MTdata          d;
-  cl_event  copyevent;
-
-  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-
-  image = create_image_2d(context, CL_MEM_READ_WRITE, format, img_width,
-                          img_height, 0, NULL, &err);
-  test_error(err, "create_image_2d failed");
-
-  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
-  test_error(err, "clGetImageInfo failed");
-
-  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height;
-
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-  d = init_genrand( gRandomSeed );
-  bufptr = (cl_uchar*)malloc(buffer_size);
-  for (i=0; i<(int)buffer_size; i++) {
-     bufptr[i] = (cl_uchar)genrand_int32(d);
-  }
-  free_mtdata(d); d = NULL;
-
-  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,1};
-  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueWriteBuffer failed");
-
-  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
-  test_error(err, "clEnqueueCopyImageToBuffer failed");
-
-  imgptr = (cl_uchar*)malloc(buffer_size);
-
-  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  err = clReleaseEvent(copyevent);
-  test_error(err, "clReleaseEvent failed");
-
-  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
-    log_error( "ERROR: Results did not validate!\n" );
-    unsigned char * inchar = (unsigned char*)bufptr;
-    unsigned char * outchar = (unsigned char*)imgptr;
-    int failuresPrinted = 0;
-    int i;
-    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
-        int failed = 0;
-        int j;
-        for (j=0; j<(int)elem_size; j++)
-            if (inchar[i+j] != outchar[i+j])
-                failed = 1;
-        char values[4096];
-        values[0] = 0;
-        if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
-            int j;
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != expected [");
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
-            sprintf(values + strlen(values), "]");
-            log_error("%s\n", values);
-            failuresPrinted++;
-        }
-        if (failuresPrinted > 5) {
-            log_error("Not printing further failures...\n");
-            break;
+    cl_uchar *bufptr, *imgptr;
+    clMemWrapper buffer, image;
+    int img_width = 512;
+    int img_height = 512;
+    int img_depth = (image_type == CL_MEM_OBJECT_IMAGE3D) ? 32 : 1;
+    size_t elem_size;
+    size_t buffer_size;
+    cl_int err;
+    cl_event copyevent;
+
+    log_info("Testing %s %s\n",
+             GetChannelOrderName(format->image_channel_order),
+             GetChannelTypeName(format->image_channel_data_type));
+
+    if (CL_MEM_OBJECT_IMAGE2D == image_type)
+    {
+        image = create_image_2d(context, flags, format, img_width, img_height,
+                                0, nullptr, &err);
+    }
+    else
+    {
+        image = create_image_3d(context, flags, format, img_width, img_height,
+                                img_depth, 0, 0, nullptr, &err);
+    }
+    test_error(err, "create_image_xd failed");
+
+    err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t),
+                         &elem_size, NULL);
+    test_error(err, "clGetImageInfo failed");
+
+    buffer_size =
+        sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
+
+    buffer =
+        clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
+    test_error(err, "clCreateBuffer failed");
+
+    RandomSeed seed(gRandomSeed);
+    bufptr =
+        static_cast<cl_uchar *>(create_random_data(kUChar, seed, buffer_size));
+
+    size_t origin[3] = { 0, 0, 0 },
+           region[3] = { img_width, img_height, img_depth };
+    err = clEnqueueWriteBuffer(queue, buffer, CL_TRUE, 0, buffer_size, bufptr,
+                               0, NULL, NULL);
+    test_error(err, "clEnqueueWriteBuffer failed");
+
+    err = clEnqueueCopyBufferToImage(queue, buffer, image, 0, origin, region, 0,
+                                     NULL, &copyevent);
+    test_error(err, "clEnqueueCopyImageToBuffer failed");
+
+    imgptr = static_cast<cl_uchar *>(malloc(buffer_size));
+
+    err = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0,
+                             imgptr, 1, &copyevent, NULL);
+    test_error(err, "clEnqueueReadImage failed");
+
+    err = clReleaseEvent(copyevent);
+    test_error(err, "clReleaseEvent failed");
+
+    if (memcmp(bufptr, imgptr, buffer_size) != 0)
+    {
+        log_error("ERROR: Results did not validate!\n");
+        auto inchar = static_cast<unsigned char *>(bufptr);
+        auto outchar = static_cast<unsigned char *>(imgptr);
+        int failuresPrinted = 0;
+        for (int i = 0; i < (int)buffer_size; i += (int)elem_size)
+        {
+            if (memcmp(&inchar[i], &outchar[i], elem_size) != 0)
+            {
+                log_error("%d(0x%x) -> actual [", i, i);
+                for (int j = 0; j < (int)elem_size; j++)
+                    log_error("0x%02x ", inchar[i + j]);
+                log_error("] != expected [");
+                for (int j = 0; j < (int)elem_size; j++)
+                    log_error("0x%02x ", outchar[i + j]);
+                log_error("]\n");
+                failuresPrinted++;
+            }
+            if (failuresPrinted > 5)
+            {
+                log_error("Not printing further failures...\n");
+                break;
+            }
         }
+        err = -1;
     }
-    err = -1;
-  }
 
-  free(bufptr);
-  free(imgptr);
+    free(bufptr);
+    free(imgptr);
 
-  if (err)
-    log_error("ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
-              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
+    if (err)
+        log_error(
+            "ARRAY to IMAGE copy test failed for image_channel_order=0x%lx and "
+            "image_channel_data_type=0x%lx\n",
+            (unsigned long)format->image_channel_order,
+            (unsigned long)format->image_channel_data_type);
 
-  return err;
+    return err;
 }
 
-int test_arrayimagecopy(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+
+int test_arrayimagecommon(cl_device_id device, cl_context context,
+                          cl_command_queue queue, cl_mem_flags flags,
+                          cl_mem_object_type image_type)
 {
-  cl_int          err;
-  cl_image_format *formats;
-  cl_uint         num_formats;
-  cl_uint         i;
+    cl_int err;
+    cl_uint num_formats;
+
+    err = clGetSupportedImageFormats(context, flags, image_type, 0, NULL,
+                                     &num_formats);
+    test_error(err, "clGetSupportedImageFormats failed");
+
+    std::vector<cl_image_format> formats(num_formats);
 
-  PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
+    err = clGetSupportedImageFormats(context, flags, image_type, num_formats,
+                                     formats.data(), NULL);
+    test_error(err, "clGetSupportedImageFormats failed");
 
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &num_formats);
-  test_error(err, "clGetSupportedImageFormats failed");
+    for (const auto &format : formats)
+    {
+        err |= test_arrayimagecopy_single_format(device, context, queue, flags,
+                                                 image_type, &format);
+    }
 
-  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
+    if (err)
+        log_error("ARRAY to IMAGE%s copy test failed\n",
+                  convert_image_type_to_string(image_type));
+    else
+        log_info("ARRAY to IMAGE%s copy test passed\n",
+                 convert_image_type_to_string(image_type));
 
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, num_formats, formats, NULL);
-  test_error(err, "clGetSupportedImageFormats failed");
+    return err;
+}
 
-  for (i = 0; i < num_formats; i++) {
-    err |= test_arrayimagecopy_single_format(device, context, queue, &formats[i]);
-  }
+int test_arrayimagecopy(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-  free(formats);
-  if (err)
-    log_error("ARRAY to IMAGE copy test failed\n");
-  else
-    log_info("ARRAY to IMAGE copy test passed\n");
+    return test_arrayimagecommon(device, context, queue, CL_MEM_READ_WRITE,
+                                 CL_MEM_OBJECT_IMAGE2D);
+}
+
+
+int test_arrayimagecopy3d(cl_device_id device, cl_context context,
+                          cl_command_queue queue, int num_elements)
+{
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
 
-  return err;
+    return test_arrayimagecommon(device, context, queue, CL_MEM_READ_ONLY,
+                                 CL_MEM_OBJECT_IMAGE3D);
 }
diff --git a/test_conformance/basic/test_arrayimagecopy3d.cpp b/test_conformance/basic/test_arrayimagecopy3d.cpp
deleted file mode 100644
index 1b08ec92..00000000
--- a/test_conformance/basic/test_arrayimagecopy3d.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-int test_arrayimagecopy3d_single_format(cl_device_id device, cl_context context, cl_command_queue queue, cl_image_format *format)
-{
-  cl_uchar    *bufptr, *imgptr;
-  clMemWrapper      buffer, image;
-  int        img_width = 128;
-  int        img_height = 128;
-  int        img_depth = 32;
-  size_t    elem_size;
-  size_t    buffer_size;
-  int        i;
-  cl_int          err;
-  MTdata          d;
-  cl_event  copyevent;
-
-  log_info("Testing %s %s\n", GetChannelOrderName(format->image_channel_order), GetChannelTypeName(format->image_channel_data_type));
-
-  image = create_image_3d(context, CL_MEM_READ_ONLY, format, img_width,
-                          img_height, img_depth, 0, 0, NULL, &err);
-  test_error(err, "create_image_3d failed");
-
-  err = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(size_t), &elem_size, NULL);
-  test_error(err, "clGetImageInfo failed");
-
-  buffer_size = sizeof(cl_uchar) * elem_size * img_width * img_height * img_depth;
-
-  buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-  d = init_genrand( gRandomSeed );
-  bufptr = (cl_uchar*)malloc(buffer_size);
-  for (i=0; i<(int)buffer_size; i++) {
-     bufptr[i] = (cl_uchar)genrand_int32(d);
-  }
-  free_mtdata(d); d = NULL;
-
-  size_t origin[3]={0,0,0}, region[3]={img_width,img_height,img_depth};
-  err = clEnqueueWriteBuffer( queue, buffer, CL_TRUE, 0, buffer_size, bufptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueWriteBuffer failed");
-
-  err = clEnqueueCopyBufferToImage( queue, buffer, image, 0, origin, region, 0, NULL, &copyevent );
-  test_error(err, "clEnqueueCopyImageToBuffer failed");
-
-  imgptr = (cl_uchar*)malloc(buffer_size);
-
-  err = clEnqueueReadImage( queue, image, CL_TRUE, origin, region, 0, 0, imgptr, 1, &copyevent, NULL );
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  err = clReleaseEvent(copyevent);
-  test_error(err, "clReleaseEvent failed");
-
-  if (memcmp(bufptr, imgptr, buffer_size) != 0) {
-    log_error( "ERROR: Results did not validate!\n" );
-    unsigned char * inchar = (unsigned char*)bufptr;
-    unsigned char * outchar = (unsigned char*)imgptr;
-    int failuresPrinted = 0;
-    int i;
-    for (i=0; i< (int)buffer_size; i+=(int)elem_size) {
-        int failed = 0;
-        int j;
-        for (j=0; j<(int)elem_size; j++)
-            if (inchar[i+j] != outchar[i+j])
-                failed = 1;
-        char values[4096];
-        values[0] = 0;
-        if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
-            int j;
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != expected [");
-            for (j=0; j<(int)elem_size; j++)
-                sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
-            sprintf(values + strlen(values), "]");
-            log_error("%s\n", values);
-            failuresPrinted++;
-        }
-        if (failuresPrinted > 5) {
-            log_error("Not printing further failures...\n");
-            break;
-        }
-    }
-    err = -1;
-  }
-
-  free(bufptr);
-  free(imgptr);
-
-  if (err)
-    log_error("ARRAY to IMAGE3D copy test failed for image_channel_order=0x%lx and image_channel_data_type=0x%lx\n",
-              (unsigned long)format->image_channel_order, (unsigned long)format->image_channel_data_type);
-
-  return err;
-}
-
-int test_arrayimagecopy3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-  cl_int          err;
-  cl_image_format *formats;
-  cl_uint         num_formats;
-  cl_uint         i;
-
-  PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-  err = clGetSupportedImageFormats(
-      context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, 0, NULL, &num_formats);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  formats = (cl_image_format *)malloc(num_formats * sizeof(cl_image_format));
-
-  err = clGetSupportedImageFormats(context, CL_MEM_READ_ONLY,
-                                   CL_MEM_OBJECT_IMAGE3D, num_formats, formats,
-                                   NULL);
-  test_error(err, "clGetSupportedImageFormats failed");
-
-  for (i = 0; i < num_formats; i++) {
-    err |= test_arrayimagecopy3d_single_format(device, context, queue, &formats[i]);
-  }
-
-  free(formats);
-  if (err)
-    log_error("ARRAY to IMAGE3D copy test failed\n");
-  else
-    log_info("ARRAY to IMAGE3D copy test passed\n");
-
-  return err;
-}
diff --git a/test_conformance/basic/test_float2int.cpp b/test_conformance/basic/test_float2int.cpp
index 4063a958..7f6276dd 100644
--- a/test_conformance/basic/test_float2int.cpp
+++ b/test_conformance/basic/test_float2int.cpp
@@ -61,7 +61,6 @@ test_float2int(cl_device_id device, cl_context context, cl_command_queue queue,
     cl_int          *output_ptr;
     cl_program        program;
     cl_kernel        kernel;
-    void            *values[2];
     size_t    threads[1];
     int                err;
     int                i;
@@ -103,8 +102,6 @@ test_float2int(cl_device_id device, cl_context context, cl_command_queue queue,
         return -1;
     }
 
-    values[0] = streams[0];
-    values[1] = streams[1];
   err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
   err = clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
     if (err != CL_SUCCESS)
diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp
index 60d509b0..fced0f4e 100644
--- a/test_conformance/basic/test_fpmath_float.cpp
+++ b/test_conformance/basic/test_fpmath_float.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -22,245 +22,175 @@
 #include <sys/stat.h>
 #include "harness/rounding_mode.h"
 
+#include <algorithm>
+#include <functional>
+#include <string>
+#include <vector>
+
 #include "procs.h"
 
-static const char *fpadd_kernel_code =
-"__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-static const char *fpsub_kernel_code =
-"__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-static const char *fpmul_kernel_code =
-"__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-
-static int
-verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
+struct TestDef
 {
-    float       r;
-    int         i;
+    const char op;
+    std::function<float(float, float)> ref;
+};
 
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_ADD float test failed\n");
-            return -1;
-        }
-    }
+static const char *fp_kernel_code = R"(
+__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst)
+{
+    int  tid = get_global_id(0);
 
-    log_info("FP_ADD float test passed\n");
-    return 0;
-}
+    dst[tid] = srcA[tid] OP srcB[tid];
+})";
 
-static int
-verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n)
+static int verify_fp(std::vector<float> (&input)[2], std::vector<float> &output,
+                     const TestDef &test)
 {
-    float       r;
-    int         i;
 
-    for (i=0; i<n; i++)
+    auto &inA = input[0];
+    auto &inB = input[1];
+    for (int i = 0; i < output.size(); i++)
     {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
+        float r = test.ref(inA[i], inB[i]);
+        if (r != output[i])
         {
-            log_error("FP_SUB float test failed\n");
+            log_error("FP '%c' float test failed\n", test.op);
             return -1;
         }
     }
 
-    log_info("FP_SUB float test passed\n");
+    log_info("FP '%c' float test passed\n", test.op);
     return 0;
 }
 
-static int
-verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n)
+
+void generate_random_inputs(std::vector<cl_float> (&input)[2])
 {
-    float       r;
-    int         i;
+    RandomSeed seed(gRandomSeed);
+
+    auto random_generator = [&seed]() {
+        return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31),
+                                MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed);
+    };
 
-    for (i=0; i<n; i++)
+    for (auto &v : input)
     {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_MUL float test failed\n");
-            return -1;
-        }
+        std::generate(v.begin(), v.end(), random_generator);
     }
-
-    log_info("FP_MUL float test passed\n");
-    return 0;
 }
 
-
-int
-test_fpmath_float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+template <size_t N>
+int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue,
+                int num_elements, const std::string type_str,
+                const TestDef &test)
 {
-    cl_mem streams[4];
-    cl_program program[3];
-    cl_kernel kernel[3];
-
-    float *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-    size_t length = sizeof(cl_float) * num_elements;
+    clMemWrapper streams[3];
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    int err;
+
+    size_t length = sizeof(cl_float) * num_elements * N;
+
     int isRTZ = 0;
     RoundingMode oldMode = kDefaultRoundingMode;
 
-    // check for floating point capabilities
-    cl_device_fp_config single_config = 0;
-    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
-    if (err) {
-      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
-      return -1;
-    }
-    //If we only support rtz mode
-    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
+    // If we only support rtz mode
+    if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device))
     {
-        //Check to make sure we are an embedded device
-        char profile[32];
-        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
-        if( err )
-        {
-            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
-              return -1;
-        }
-        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
-        {
-            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
-            return -1;
-        }
-
         isRTZ = 1;
         oldMode = get_round();
     }
 
 
-    input_ptr[0] = (cl_float*)malloc(length);
-    input_ptr[1] = (cl_float*)malloc(length);
-    input_ptr[2] = (cl_float*)malloc(length);
-    output_ptr   = (cl_float*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
+    std::vector<cl_float> inputs[]{ std::vector<cl_float>(N * num_elements),
+                                    std::vector<cl_float>(N * num_elements) };
+    std::vector<cl_float> output = std::vector<cl_float>(N * num_elements);
 
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    test_error( err, "clEnqueueWriteBuffer failed.");
+    generate_random_inputs(inputs);
 
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    test_error( err, "clEnqueueWriteBuffer failed.");
-
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    test_error( err, "clEnqueueWriteBuffer failed.");
-
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
-    test_error( err, "create_single_kernel_helper failed");
+    for (int i = 0; i < ARRAY_SIZE(streams); i++)
+    {
+        streams[i] =
+            clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+        test_error(err, "clCreateBuffer failed.");
+    }
+    for (int i = 0; i < ARRAY_SIZE(inputs); i++)
+    {
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length,
+                                   inputs[i].data(), 0, NULL, NULL);
+        test_error(err, "clEnqueueWriteBuffer failed.");
+    }
 
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
-    test_error( err, "create_single_kernel_helper failed");
+    std::string build_options = "-DTYPE=";
+    build_options.append(type_str).append(" -DOP=").append(1, test.op);
 
-    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
-    test_error( err, "create_single_kernel_helper failed");
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &fp_kernel_code, "test_fp",
+                                      build_options.c_str());
 
+    test_error(err, "create_single_kernel_helper failed");
 
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
+    for (int i = 0; i < ARRAY_SIZE(streams); i++)
+    {
+        err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+        test_error(err, "clSetKernelArgs failed.");
+    }
 
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
+    size_t threads[] = { static_cast<size_t>(num_elements) };
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL,
+                                 NULL);
+    test_error(err, "clEnqueueNDRangeKernel failed.");
 
-    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
+    err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length,
+                              output.data(), 0, NULL, NULL);
+    test_error(err, "clEnqueueReadBuffer failed.");
 
-    threads[0] = (unsigned int)num_elements;
-    for (i=0; i<3; i++)
-    {
-        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-        test_error( err, "clEnqueueNDRangeKernel failed.");
+    if (isRTZ) set_round(kRoundTowardZero, kfloat);
 
-        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-        test_error( err, "clEnqueueReadBuffer failed.");
+    err = verify_fp(inputs, output, test);
 
-        if( isRTZ )
-            set_round( kRoundTowardZero, kfloat );
+    if (isRTZ) set_round(oldMode, kfloat);
 
-        switch (i)
-        {
-            case 0:
-                err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-                break;
-            case 1:
-                err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-                break;
-            case 2:
-                err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements);
-                break;
-        }
+    return err;
+}
 
-        if( isRTZ )
-            set_round( oldMode, kfloat );
 
-        if (err)
-            break;
-    }
+template <size_t N>
+int test_fpmath_common(cl_device_id device, cl_context context,
+                       cl_command_queue queue, int num_elements,
+                       const std::string type_str)
+{
+    TestDef tests[] = { { '+', std::plus<float>() },
+                        { '-', std::minus<float>() },
+                        { '*', std::multiplies<float>() } };
+    int err = TEST_PASS;
 
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<3; i++)
+    for (const auto &test : tests)
     {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
+        err |= test_fpmath<N>(device, context, queue, num_elements, type_str,
+                              test);
     }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    free_mtdata( d );
 
     return err;
 }
 
+int test_fpmath_float(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int num_elements)
+{
+    return test_fpmath_common<1>(device, context, queue, num_elements, "float");
+}
+
+int test_fpmath_float2(cl_device_id device, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return test_fpmath_common<2>(device, context, queue, num_elements,
+                                 "float2");
+}
 
+int test_fpmath_float4(cl_device_id device, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return test_fpmath_common<4>(device, context, queue, num_elements,
+                                 "float4");
+}
diff --git a/test_conformance/basic/test_fpmath_float2.cpp b/test_conformance/basic/test_fpmath_float2.cpp
deleted file mode 100644
index 1881b4be..00000000
--- a/test_conformance/basic/test_fpmath_float2.cpp
+++ /dev/null
@@ -1,266 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "harness/rounding_mode.h"
-
-
-#include "procs.h"
-
-const char *fpadd2_kernel_code =
-"__kernel void test_fpadd2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *fpsub2_kernel_code =
-"__kernel void test_fpsub2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *fpmul2_kernel_code =
-"__kernel void test_fpmul2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-
-int
-verify_fpadd2(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_ADD float2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_ADD float2 test passed\n");
-    return 0;
-}
-
-int
-verify_fpsub2(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_SUB float2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_SUB float2 test passed\n");
-    return 0;
-}
-
-int
-verify_fpmul2(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] * inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_MUL float2 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_MUL float2 test passed\n");
-    return 0;
-}
-
-
-int
-test_fpmath_float2(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[3];
-    cl_kernel kernel[3];
-
-    cl_float *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_float) * 2 * num_elements;
-    int isRTZ = 0;
-    RoundingMode oldMode = kDefaultRoundingMode;
-
-    // check for floating point capabilities
-    cl_device_fp_config single_config = 0;
-    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
-    if (err) {
-      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
-      return -1;
-    }
-    //If we only support rtz mode
-    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
-    {
-        //Check to make sure we are an embedded device
-        char profile[32];
-        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
-        if( err )
-        {
-            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
-              return -1;
-        }
-        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
-        {
-            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
-            return -1;
-        }
-
-        isRTZ = 1;
-        oldMode = get_round();
-    }
-
-    input_ptr[0] = (cl_float*)malloc(length);
-    input_ptr[1] = (cl_float*)malloc(length);
-    input_ptr[2] = (cl_float*)malloc(length);
-    output_ptr   = (cl_float*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements*2; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd2_kernel_code, "test_fpadd2");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub2_kernel_code, "test_fpsub2");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul2_kernel_code, "test_fpmul2");
-    test_error( err, "create_single_kernel_helper failed");
-
-
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-    free_mtdata(d);
-    d = NULL;
-
-    threads[0] = (unsigned int)num_elements;
-    for (i=0; i<3; i++)
-    {
-        err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-      test_error( err, "clEnqueueNDRangeKernel failed.");
-
-        err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-      test_error( err, "clEnqueueReadBuffer failed.");
-
-        if( isRTZ )
-            set_round( kRoundTowardZero, kfloat );
-
-        switch (i)
-        {
-            case 0:
-                err = verify_fpadd2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
-                break;
-            case 1:
-                err = verify_fpsub2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
-                break;
-            case 2:
-                err = verify_fpmul2(input_ptr[0], input_ptr[1], output_ptr, num_elements*2);
-                break;
-        }
-
-        if( isRTZ )
-            set_round( oldMode, kfloat );
-
-        if (err)
-            break;
-    }
-
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<3; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
diff --git a/test_conformance/basic/test_fpmath_float4.cpp b/test_conformance/basic/test_fpmath_float4.cpp
deleted file mode 100644
index 999c8ec5..00000000
--- a/test_conformance/basic/test_fpmath_float4.cpp
+++ /dev/null
@@ -1,267 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-#include "harness/rounding_mode.h"
-
-const char *fpadd4_kernel_code =
-"__kernel void test_fpadd4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] + srcB[tid];\n"
-"}\n";
-
-const char *fpsub4_kernel_code =
-"__kernel void test_fpsub4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] - srcB[tid];\n"
-"}\n";
-
-const char *fpmul4_kernel_code =
-"__kernel void test_fpmul4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = srcA[tid] * srcB[tid];\n"
-"}\n";
-
-
-int
-verify_fpadd4(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_ADD float4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_ADD float4 test passed\n");
-    return 0;
-}
-
-int
-verify_fpsub4(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] - inptrB[i];
-        if (r != outptr[i])
-        {
-            log_error("FP_SUB float4 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("FP_SUB float4 test passed\n");
-    return 0;
-}
-
-int
-verify_fpmul4(float *inptrA, float *inptrB, float *outptr, int n)
-{
-  float       r;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-    r = inptrA[i] * inptrB[i];
-    if (r != outptr[i])
-    {
-      log_error("FP_MUL float4 test failed\n");
-      return -1;
-    }
-  }
-
-  log_info("FP_MUL float4 test passed\n");
-  return 0;
-}
-
-
-int
-test_fpmath_float4(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[4];
-    cl_program program[3];
-    cl_kernel kernel[3];
-
-    cl_float *input_ptr[3], *output_ptr, *p;
-    size_t threads[1];
-    int err, i;
-    MTdata d = init_genrand( gRandomSeed );
-
-    size_t length = sizeof(cl_float) * 4 * num_elements;
-    int isRTZ = 0;
-    RoundingMode oldMode = kDefaultRoundingMode;
-
-    // check for floating point capabilities
-    cl_device_fp_config single_config = 0;
-    err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
-    if (err) {
-      log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
-      return -1;
-    }
-    //If we only support rtz mode
-    if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
-    {
-        //Check to make sure we are an embedded device
-        char profile[32];
-        err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
-        if( err )
-        {
-            log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
-              return -1;
-        }
-        if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
-        {
-            log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
-            return -1;
-        }
-
-        isRTZ = 1;
-        oldMode = get_round();
-    }
-
-    input_ptr[0] = (cl_float*)malloc(length);
-    input_ptr[1] = (cl_float*)malloc(length);
-    input_ptr[2] = (cl_float*)malloc(length);
-    output_ptr   = (cl_float*)malloc(length);
-
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
-
-    p = input_ptr[0];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[1];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-    p = input_ptr[2];
-    for (i=0; i<num_elements*4; i++)
-        p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
-
-    free_mtdata(d);
-
-    err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-    err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
-    test_error(err, "clEnqueueWriteBuffer failed");
-
-    err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd4_kernel_code, "test_fpadd4");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub4_kernel_code, "test_fpsub4");
-    test_error( err, "create_single_kernel_helper failed");
-
-    err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul4_kernel_code, "test_fpmul4");
-    test_error( err, "create_single_kernel_helper failed");
-
-
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-    err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
-    err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
-    test_error( err, "clSetKernelArgs failed.");
-
-
-  threads[0] = (unsigned int)num_elements;
-  for (i=0; i<3; i++)
-  {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
-    test_error( err, "clEnqueueNDRangeKernel failed.");
-
-    err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-    test_error( err, "clEnqueueReadBuffer failed.");
-
-    if( isRTZ )
-        set_round( kRoundTowardZero, kfloat );
-
-    switch (i)
-    {
-      case 0:
-        err = verify_fpadd4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
-        break;
-      case 1:
-        err = verify_fpsub4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
-        break;
-      case 2:
-        err = verify_fpmul4(input_ptr[0], input_ptr[1], output_ptr, num_elements*4);
-        break;
-    }
-
-    if( isRTZ )
-        set_round( oldMode, kfloat );
-
-    if (err)
-      break;
-    }
-
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    for (i=0; i<3; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    return err;
-}
-
-
diff --git a/test_conformance/basic/test_image_param.cpp b/test_conformance/basic/test_image_param.cpp
index c8fc1b4a..3efc4c94 100644
--- a/test_conformance/basic/test_image_param.cpp
+++ b/test_conformance/basic/test_image_param.cpp
@@ -117,7 +117,8 @@ int validate_results( size_t width, size_t height, cl_image_format &format, char
             }
             default:
                 // Should never get here
-                break;
+                log_error("Unhandled channel data type\n");
+                return -1;
         }
 
         if( format.image_channel_order == CL_BGRA )
diff --git a/test_conformance/basic/test_imagearraycopy.cpp b/test_conformance/basic/test_imagearraycopy.cpp
index 0246d809..e3f2fb6d 100644
--- a/test_conformance/basic/test_imagearraycopy.cpp
+++ b/test_conformance/basic/test_imagearraycopy.cpp
@@ -87,11 +87,11 @@ int test_imagearraycopy_single_format(cl_device_id device, cl_context context, c
         char values[4096];
         values[0] = 0;
         if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            sprintf(values + strlen(values), "%d(0x%x) -> expected [", i, i);
             int j;
             for (j=0; j<(int)elem_size; j++)
                 sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != expected [");
+            sprintf(values + strlen(values), "] != actual [");
             for (j=0; j<(int)elem_size; j++)
                 sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
             sprintf(values + strlen(values), "]");
diff --git a/test_conformance/basic/test_imagearraycopy3d.cpp b/test_conformance/basic/test_imagearraycopy3d.cpp
index 19dfdbc7..60b8a584 100644
--- a/test_conformance/basic/test_imagearraycopy3d.cpp
+++ b/test_conformance/basic/test_imagearraycopy3d.cpp
@@ -84,11 +84,11 @@ int test_imagearraycopy3d_single_format(cl_device_id device, cl_context context,
         char values[4096];
         values[0] = 0;
         if (failed) {
-            sprintf(values + strlen(values), "%d(0x%x) -> actual [", i, i);
+            sprintf(values + strlen(values), "%d(0x%x) -> expected [", i, i);
             int j;
             for (j=0; j<(int)elem_size; j++)
                 sprintf(values + strlen( values), "0x%02x ", inchar[i+j]);
-            sprintf(values + strlen(values), "] != expected [");
+            sprintf(values + strlen(values), "] != actual [");
             for (j=0; j<(int)elem_size; j++)
                 sprintf(values + strlen( values), "0x%02x ", outchar[i+j]);
             sprintf(values + strlen(values), "]");
diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp
index 483698a2..3a8458c9 100644
--- a/test_conformance/basic/test_int2float.cpp
+++ b/test_conformance/basic/test_int2float.cpp
@@ -60,7 +60,6 @@ test_int2float(cl_device_id device, cl_context context, cl_command_queue queue,
     cl_float        *output_ptr;
     cl_program        program;
     cl_kernel        kernel;
-    void            *values[2];
     size_t    threads[1];
     int                err;
     int                i;
@@ -102,8 +101,6 @@ test_int2float(cl_device_id device, cl_context context, cl_command_queue queue,
         return -1;
     }
 
-    values[0] = streams[0];
-    values[1] = streams[1];
     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
     if (err != CL_SUCCESS)
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index e202d276..a46713e9 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -25,7 +25,6 @@
 
 #define ALIGNMENT 128
 
-#define OPTIONS "-cl-std=CL2.0"
 
 // NUM_ROUNDS must be at least 1.
 // It determines how many sets of random data we push through the global
@@ -81,20 +80,20 @@ class TypeInfo {
 
 public:
     TypeInfo()
-        : name(""), m_buf_elem_type(""), m_is_vecbase(false),
+        : name(""), m_elem_type(0), m_num_elem(0), m_is_vecbase(false),
           m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
-          m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+          m_size(0), m_value_size(0), m_buf_elem_type("")
     {}
     TypeInfo(const char* name_arg)
-        : name(name_arg), m_buf_elem_type(name_arg), m_is_vecbase(false),
+        : name(name_arg), m_elem_type(0), m_num_elem(0), m_is_vecbase(false),
           m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
-          m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+          m_size(0), m_value_size(0), m_buf_elem_type(name_arg)
     {}
 
     // Vectors
     TypeInfo(TypeInfo* elem_type, int num_elem)
-        : m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false),
-          m_is_bool(false), m_elem_type(elem_type), m_num_elem(num_elem)
+        : m_elem_type(elem_type), m_num_elem(num_elem), m_is_vecbase(false),
+          m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false)
     {
         char
             the_name[10]; // long enough for longest vector type name "double16"
@@ -325,7 +324,7 @@ static int num_type_info = 0; // Number of valid entries in type_info[]
 // A helper class to form kernel source arguments for clCreateProgramWithSource.
 class StringTable {
 public:
-    StringTable(): m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings()
+    StringTable(): m_strings(), m_c_strs(NULL), m_lengths(NULL), m_frozen(false)
     {}
     ~StringTable() { release_frozen(); }
 
@@ -409,8 +408,9 @@ static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
 
 static void l_set_randomly(cl_uchar* buf, size_t buf_size,
                            RandomSeed& rand_state);
-static int l_compare(const cl_uchar* expected, const cl_uchar* received,
-                     unsigned num_values, const TypeInfo& ti);
+static int l_compare(const char* test_name, const cl_uchar* expected,
+                     const cl_uchar* received, size_t num_values,
+                     const TypeInfo& ti);
 static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
                   unsigned src_idx, const TypeInfo& ti);
 
@@ -436,9 +436,9 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
 static int l_capacity(cl_device_id device, cl_context context,
                       cl_command_queue queue, size_t max_size);
 static int l_user_type(cl_device_id device, cl_context context,
-                       cl_command_queue queue, size_t max_size,
-                       bool separate_compilation);
+                       cl_command_queue queue, bool separate_compile);
 
+static std::string get_build_options(cl_device_id device);
 
 ////////////////////
 // File scope function definitions
@@ -539,7 +539,7 @@ static cl_int print_build_log(cl_program program, cl_uint num_devices,
                 log_error("clGetProgramBuildInfo returned an empty log.\n");
             else
             {
-                log_error("Build log:\n", deviceName);
+                log_error("Build log for device \"%s\":\n", deviceName);
                 log_error("%s\n", log.c_str());
             }
         }
@@ -1116,9 +1116,8 @@ static int l_write_read_for_type(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
-        OPTIONS);
+    status = create_single_kernel_helper(context, &program, &writer,
+                                         ksrc.num_str(), ksrc.strs(), "writer");
     test_error_ret(status, "Failed to create program for read-after-write test",
                    status);
 
@@ -1326,9 +1325,8 @@ static int l_init_write_read_for_type(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
-        OPTIONS);
+    status = create_single_kernel_helper(context, &program, &writer,
+                                         ksrc.num_str(), ksrc.strs(), "writer");
     test_error_ret(status,
                    "Failed to create program for init-read-after-write test",
                    status);
@@ -1581,9 +1579,9 @@ static int l_capacity(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper get_max_size;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(),
-        "get_max_size", OPTIONS);
+    status = create_single_kernel_helper(context, &program, &get_max_size,
+                                         ksrc.num_str(), ksrc.strs(),
+                                         "get_max_size");
     test_error_ret(status, "Failed to create program for capacity test",
                    status);
 
@@ -1737,6 +1735,8 @@ static int l_user_type(cl_device_id device, cl_context context,
 
     clProgramWrapper program;
 
+    const std::string options = get_build_options(device);
+
     if (separate_compile)
     {
         // Separate compilation flow.
@@ -1757,15 +1757,15 @@ static int l_user_type(cl_device_id device, cl_context context,
                        "Failed to create writer program for user type test",
                        status);
 
-        status = clCompileProgram(writer_program, 1, &device, OPTIONS, 0, 0, 0,
-                                  0, 0);
+        status = clCompileProgram(writer_program, 1, &device, options.c_str(),
+                                  0, 0, 0, 0, 0);
         if (check_error(
                 status,
                 "Failed to compile writer program for user type test (%s)",
                 IGetErrorString(status)))
         {
             print_build_log(writer_program, 1, &device, wksrc.num_str(),
-                            wksrc.strs(), wksrc.lengths(), OPTIONS);
+                            wksrc.strs(), wksrc.lengths(), options.c_str());
             return status;
         }
 
@@ -1775,15 +1775,15 @@ static int l_user_type(cl_device_id device, cl_context context,
                        "Failed to create reader program for user type test",
                        status);
 
-        status = clCompileProgram(reader_program, 1, &device, OPTIONS, 0, 0, 0,
-                                  0, 0);
+        status = clCompileProgram(reader_program, 1, &device, options.c_str(),
+                                  0, 0, 0, 0, 0);
         if (check_error(
                 status,
                 "Failed to compile reader program for user type test (%s)",
                 IGetErrorString(status)))
         {
             print_build_log(reader_program, 1, &device, rksrc.num_str(),
-                            rksrc.strs(), rksrc.lengths(), OPTIONS);
+                            rksrc.strs(), rksrc.lengths(), options.c_str());
             return status;
         }
 
@@ -1813,23 +1813,23 @@ static int l_user_type(cl_device_id device, cl_context context,
         int status = CL_SUCCESS;
 
         status = create_single_kernel_helper_create_program(
-            context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
+            context, &program, ksrc.num_str(), ksrc.strs(), options.c_str());
         if (check_error(status,
                         "Failed to build program for user type test (%s)",
                         IGetErrorString(status)))
         {
             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
-                            ksrc.lengths(), OPTIONS);
+                            ksrc.lengths(), options.c_str());
             return status;
         }
 
-        status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
+        status = clBuildProgram(program, 1, &device, options.c_str(), 0, 0);
         if (check_error(status,
                         "Failed to compile program for user type test (%s)",
                         IGetErrorString(status)))
         {
             print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
-                            ksrc.lengths(), OPTIONS);
+                            ksrc.lengths(), options.c_str());
             return status;
         }
     }
@@ -1935,6 +1935,14 @@ static int l_user_type(cl_device_id device, cl_context context,
     return err;
 }
 
+static std::string get_build_options(cl_device_id device)
+{
+    std::string options = "-cl-std=CL";
+    Version latest_cl_c_version = get_device_latest_cl_c_version(device);
+    options += latest_cl_c_version.to_string();
+    return options;
+}
+
 // Determines whether its valid to skip this test based on the driver version
 // and the features it optionally supports.
 // Whether the test should be skipped is writen into the out paramter skip.
@@ -2102,9 +2110,9 @@ int test_progvar_func_scope(cl_device_id device, cl_context context,
     clProgramWrapper program;
     clKernelWrapper test_bump;
 
-    status = create_single_kernel_helper_with_build_options(
-        context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump",
-        OPTIONS);
+    status =
+        create_single_kernel_helper(context, &program, &test_bump,
+                                    ksrc.num_str(), ksrc.strs(), "test_bump");
     test_error_ret(status,
                    "Failed to create program for function static variable test",
                    status);
diff --git a/test_conformance/basic/test_readimage.cpp b/test_conformance/basic/test_readimage.cpp
index 57860090..0aa70525 100644
--- a/test_conformance/basic/test_readimage.cpp
+++ b/test_conformance/basic/test_readimage.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "harness/compat.h"
+#include "harness/imageHelpers.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -21,272 +22,356 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+#include <string>
+#include <vector>
 
 #include "procs.h"
 
-static const char *bgra8888_kernel_code =
-"\n"
-"__kernel void test_bgra8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
-"    dst[indx] = convert_uchar4_rte(color.zyxw);\n"
-"\n"
-"}\n";
-
-
-static const char *rgba8888_kernel_code =
-"\n"
-"__kernel void test_rgba8888(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;\n"
-"    dst[indx] = convert_uchar4_rte(color);\n"
-"\n"
-"}\n";
-
-
-static unsigned char *
-generate_8888_image(int w, int h, MTdata d)
+#define TEST_IMAGE_WIDTH_2D (512)
+#define TEST_IMAGE_HEIGHT_2D (512)
+
+#define TEST_IMAGE_WIDTH_3D (64)
+#define TEST_IMAGE_HEIGHT_3D (64)
+#define TEST_IMAGE_DEPTH_3D (64)
+
+#define TEST_IMAGE_WIDTH(TYPE)                                                 \
+    ((CL_MEM_OBJECT_IMAGE2D == TYPE) ? TEST_IMAGE_WIDTH_2D                     \
+                                     : TEST_IMAGE_WIDTH_3D)
+#define TEST_IMAGE_HEIGHT(TYPE)                                                \
+    ((CL_MEM_OBJECT_IMAGE2D == TYPE) ? TEST_IMAGE_HEIGHT_2D                    \
+                                     : TEST_IMAGE_HEIGHT_3D)
+#define TEST_IMAGE_DEPTH(TYPE)                                                 \
+    ((CL_MEM_OBJECT_IMAGE2D == TYPE) ? 1 : TEST_IMAGE_DEPTH_3D)
+
+namespace {
+const char *kernel_source_2d = R"(
+__kernel void test_CL_BGRACL_UNORM_INT8(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)
 {
-    unsigned char   *ptr = (unsigned char*)malloc(w * h * 4);
-    int             i;
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    indx = tid_y * get_image_width(srcimg) + tid_x;
+    float4 color;
+
+    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;
+    dst[indx] = convert_uchar4_rte(color.zyxw);
+}
 
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (unsigned char)genrand_int32( d);
+__kernel void test_CL_RGBACL_UNORM_INT8(read_only image2d_t srcimg, __global uchar4 *dst, sampler_t sampler)
+{
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    indx = tid_y * get_image_width(srcimg) + tid_x;
+    float4 color;
 
-    return ptr;
+    color = read_imagef(srcimg, sampler, (int2)(tid_x, tid_y)) * 255.0f;
+    dst[indx] = convert_uchar4_rte(color);
 }
 
-static int
-verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+__kernel void test_CL_RGBACL_UNORM_INT16(read_only image2d_t srcimg, __global ushort4 *dst, sampler_t smp)
 {
-    int     i;
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    indx = tid_y * get_image_width(srcimg) + tid_x;
+    float4 color;
+
+    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));
+    ushort4 dst_write;
+    dst_write.x = convert_ushort_rte(color.x * 65535.0f);
+    dst_write.y = convert_ushort_rte(color.y * 65535.0f);
+    dst_write.z = convert_ushort_rte(color.z * 65535.0f);
+    dst_write.w = convert_ushort_rte(color.w * 65535.0f);
+    dst[indx] = dst_write;
+}
 
-    for (i=0; i<w*h; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("READ_IMAGE_BGRA_UNORM_INT8 test failed\n");
-            return -1;
-        }
-    }
+__kernel void test_CL_RGBACL_FLOAT(read_only image2d_t srcimg, __global float4 *dst, sampler_t smp)
+{
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    indx = tid_y * get_image_width(srcimg) + tid_x;
+    float4 color;
+
+    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));
+    
+    dst[indx].x = color.x;
+    dst[indx].y = color.y;
+    dst[indx].z = color.z;
+    dst[indx].w = color.w;
 
-    log_info("READ_IMAGE_BGRA_UNORM_INT8 test passed\n");
-    return 0;
 }
+)";
 
-static int
-verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+static const char *kernel_source_3d = R"(
+__kernel void test_CL_BGRACL_UNORM_INT8(read_only image3d_t srcimg, __global uchar4 *dst, sampler_t sampler)
 {
-    int     i;
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    tid_z = get_global_id(2);
+    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;
+    float4 color;
+
+    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0))* 255.0f;
+    dst[indx].x = color.z;
+    dst[indx].y = color.y;
+    dst[indx].z = color.x;
+    dst[indx].w = color.w;
 
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("READ_IMAGE_RGBA_UNORM_INT8 test failed\n");
-            return -1;
-        }
-    }
+}
+
+__kernel void test_CL_RGBACL_UNORM_INT8(read_only image3d_t srcimg, __global uchar4 *dst, sampler_t sampler)
+{
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    tid_z = get_global_id(2);
+    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;
+    float4 color;
+
+    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0))* 255.0f;
+
+    dst[indx].x = color.x;
+    dst[indx].y = color.y;
+    dst[indx].z = color.z;
+    dst[indx].w = color.w;
+
+}
+
+__kernel void test_CL_RGBACL_UNORM_INT16(read_only image3d_t srcimg, __global ushort4 *dst, sampler_t sampler)
+{
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    tid_z = get_global_id(2);
+    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;
+    float4 color;
+
+    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));
+    ushort4 dst_write;
+    dst_write.x = convert_ushort_rte(color.x * 65535.0f);
+    dst_write.y = convert_ushort_rte(color.y * 65535.0f);
+    dst_write.z = convert_ushort_rte(color.z * 65535.0f);
+    dst_write.w = convert_ushort_rte(color.w * 65535.0f);
+    dst[indx] = dst_write;
 
-    log_info("READ_IMAGE_RGBA_UNORM_INT8 test passed\n");
-    return 0;
 }
 
+__kernel void test_CL_RGBACL_FLOAT(read_only image3d_t srcimg, __global float *dst, sampler_t sampler)
+{
+    int    tid_x = get_global_id(0);
+    int    tid_y = get_global_id(1);
+    int    tid_z = get_global_id(2);
+    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;
+    float4 color;
+
+    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));
+    indx *= 4;
+    dst[indx+0] = color.x;
+    dst[indx+1] = color.y;
+    dst[indx+2] = color.z;
+    dst[indx+3] = color.w;
+
+}
+)";
 
-int test_readimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+template <typename T> void generate_random_inputs(std::vector<T> &v)
 {
-    cl_mem streams[3];
-    cl_program program[2];
-    cl_kernel kernel[2];
-    cl_image_format    img_format;
-    cl_image_format *supported_formats;
-    unsigned char    *input_ptr[2], *output_ptr;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int i, err;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(unsigned char);
-    MTdata d = init_genrand( gRandomSeed );
-    int supportsBGRA = 0;
-    cl_uint numFormats = 0;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    d = init_genrand( gRandomSeed );
-    input_ptr[0] = generate_8888_image(img_width, img_height, d);
-    input_ptr[1] = generate_8888_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (unsigned char*)malloc(length);
-
-    if(gIsEmbedded)
+    RandomSeed seed(gRandomSeed);
+
+    auto random_generator = [&seed]() {
+        return static_cast<T>(genrand_int32(seed));
+    };
+
+    std::generate(v.begin(), v.end(), random_generator);
+}
+
+template <> void generate_random_inputs<float>(std::vector<float> &v)
+{
+    RandomSeed seed(gRandomSeed);
+
+    auto random_generator = [&seed]() {
+        return get_random_float(-0x40000000, 0x40000000, seed);
+    };
+
+    std::generate(v.begin(), v.end(), random_generator);
+}
+
+cl_mem create_image_xd(cl_context context, cl_mem_flags flags,
+                       cl_mem_object_type type, const cl_image_format *fmt,
+                       size_t x, size_t y, size_t z, cl_int *err)
+{
+
+    return (CL_MEM_OBJECT_IMAGE2D == type)
+        ? create_image_2d(context, flags, fmt, x, y, 0, nullptr, err)
+        : create_image_3d(context, flags, fmt, x, y, z, 0, 0, nullptr, err);
+}
+
+template <cl_mem_object_type IMG_TYPE, typename T>
+int test_readimage(cl_device_id device, cl_context context,
+                   cl_command_queue queue, const cl_image_format *img_format)
+{
+    clMemWrapper streams[2];
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clSamplerWrapper sampler;
+
+    std::string kernel_name("test_");
+
+    size_t img_width = TEST_IMAGE_WIDTH(IMG_TYPE);
+    size_t img_height = TEST_IMAGE_HEIGHT(IMG_TYPE);
+    size_t img_depth = TEST_IMAGE_DEPTH(IMG_TYPE);
+
+    int err;
+
+    const size_t origin[3] = { 0, 0, 0 };
+    const size_t region[3] = { img_width, img_height, img_depth };
+
+    const size_t num_elements = img_width * img_height * img_depth * 4;
+    const size_t length = num_elements * sizeof(T);
+
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
+
+    std::vector<T> input(num_elements);
+    std::vector<T> output(num_elements);
+
+    generate_random_inputs(input);
+
+    streams[0] =
+        create_image_xd(context, CL_MEM_READ_ONLY, IMG_TYPE, img_format,
+                        img_width, img_height, img_depth, &err);
+    test_error(err, "create_image failed.");
+
+    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
+    test_error(err, "clCreateBuffer failed.");
+
+    sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE,
+                              CL_FILTER_NEAREST, &err);
+    test_error(err, "clCreateSampler failed");
+
+    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0,
+                              input.data(), 0, NULL, NULL);
+    test_error(err, "clEnqueueWriteImage failed.");
+
+    kernel_name += GetChannelOrderName(img_format->image_channel_order);
+    kernel_name += GetChannelTypeName(img_format->image_channel_data_type);
+
+    const char **kernel_source = (CL_MEM_OBJECT_IMAGE2D == IMG_TYPE)
+        ? &kernel_source_2d
+        : &kernel_source_3d;
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      kernel_source, kernel_name.c_str());
+    test_error(err, "create_single_kernel_helper failed.");
+
+    err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    err |= clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    err |= clSetKernelArg(kernel, 2, sizeof(sampler), &sampler);
+    test_error(err, "clSetKernelArgs failed\n");
+
+    err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, region, NULL, 0, NULL,
+                                 NULL);
+    test_error(err, "clEnqueueNDRangeKernel failed\n");
+
+    err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length,
+                              output.data(), 0, NULL, NULL);
+    test_error(err, "clEnqueueReadBuffer failed\n");
+
+    if (0 != memcmp(input.data(), output.data(), length))
     {
-        /* Get the supported image formats to see if BGRA is supported */
-        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numFormats);
-        supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numFormats);
-        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
-
-        for(i = 0; i < numFormats; i++)
-        {
-            if(supported_formats[i].image_channel_order == CL_BGRA)
-            {
-                supportsBGRA = 1;
-                break;
-            }
-        }
+        log_error("READ_IMAGE_%s_%s test failed\n",
+                  GetChannelOrderName(img_format->image_channel_order),
+                  GetChannelTypeName(img_format->image_channel_data_type));
+        err = -1;
     }
     else
     {
-        supportsBGRA = 1;
+        log_info("READ_IMAGE_%s_%s test passed\n",
+                 GetChannelOrderName(img_format->image_channel_order),
+                 GetChannelTypeName(img_format->image_channel_data_type));
     }
 
-    if(supportsBGRA)
-    {
-        img_format.image_channel_order = CL_BGRA;
-        img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[0] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-        if (!streams[0])
-        {
-            log_error("clCreateImage2D failed\n");
-            return -1;
-        }
-    }
+    return err;
+}
 
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[1] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateImage2D failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
+bool check_format(cl_device_id device, cl_context context,
+                  cl_mem_object_type image_type,
+                  const cl_image_format img_format)
+{
+    return is_image_format_required(img_format, CL_MEM_READ_ONLY, image_type,
+                                    device)
+        || is_image_format_supported(context, CL_MEM_READ_ONLY, image_type,
+                                     &img_format);
+}
 
-    if(supportsBGRA)
-    {
-        err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr[0], 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueWriteImage failed\n");
-            return -1;
-        }
-    }
+}
+int test_readimage(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements)
+{
+    const cl_image_format format[] = { { CL_RGBA, CL_UNORM_INT8 },
+                                       { CL_BGRA, CL_UNORM_INT8 } };
 
-    err = clEnqueueWriteImage(queue, streams[1], CL_TRUE, origin, region, 0, 0, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteImage failed\n");
-        return -1;
-    }
+    int err = test_readimage<CL_MEM_OBJECT_IMAGE2D, cl_uchar>(
+        device, context, queue, &format[0]);
 
-    if(supportsBGRA)
+    if (check_format(device, context, CL_MEM_OBJECT_IMAGE2D, format[1]))
     {
-        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_kernel_code, "test_bgra8888" );
-        if (err)
-            return -1;
+        err |= test_readimage<CL_MEM_OBJECT_IMAGE2D, cl_uchar>(
+            device, context, queue, &format[1]);
     }
 
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_kernel_code, "test_rgba8888" );
-    if (err)
-        return -1;
+    return err;
+}
 
-    cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-    test_error(err, "clCreateSampler failed");
+int test_readimage_int16(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements)
+{
+    const cl_image_format format = { CL_RGBA, CL_UNORM_INT16 };
+    return test_readimage<CL_MEM_OBJECT_IMAGE2D, cl_ushort>(device, context,
+                                                            queue, &format);
+}
 
-    if(supportsBGRA)
-    {
-        err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
-        err |= clSetKernelArg(kernel[0], 1, sizeof streams[2], &streams[2]);
-        err |= clSetKernelArg(kernel[0], 2, sizeof sampler, &sampler);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArg failed\n");
-            return -1;
-        }
-    }
+int test_readimage_fp32(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    const cl_image_format format = { CL_RGBA, CL_FLOAT };
+    return test_readimage<CL_MEM_OBJECT_IMAGE2D, cl_float>(device, context,
+                                                           queue, &format);
+}
 
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[1], &streams[1]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[2], &streams[2]);
-    err |= clSetKernelArg(kernel[1], 2, sizeof sampler, &sampler);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArg failed\n");
-        return -1;
-    }
+int test_readimage3d(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements)
+{
+    const cl_image_format format[] = { { CL_RGBA, CL_UNORM_INT8 },
+                                       { CL_BGRA, CL_UNORM_INT8 } };
+
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
 
-    threads[0] = (unsigned int)img_width;
-    threads[1] = (unsigned int)img_height;
+    int err = test_readimage<CL_MEM_OBJECT_IMAGE3D, cl_uchar>(
+        device, context, queue, &format[0]);
 
-    for (i=0; i<2; i++)
+    if (check_format(device, context, CL_MEM_OBJECT_IMAGE3D, format[1]))
     {
-        if(i == 0 && !supportsBGRA)
-            continue;
-
-        err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
-            return -1;
-        }
-        err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        switch (i)
-        {
-            case 0:
-                err = verify_bgra8888_image(input_ptr[i], output_ptr, img_width, img_height);
-                break;
-            case 1:
-                err = verify_rgba8888_image(input_ptr[i], output_ptr, img_width, img_height);
-                break;
-        }
-
-        if (err)
-            break;
+        err |= test_readimage<CL_MEM_OBJECT_IMAGE3D, cl_uchar>(
+            device, context, queue, &format[1]);
     }
 
-    // cleanup
-    clReleaseSampler(sampler);
+    return err;
+}
 
-    if(supportsBGRA)
-            clReleaseMemObject(streams[0]);
+int test_readimage3d_int16(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int num_elements)
+{
+    const cl_image_format format = { CL_RGBA, CL_UNORM_INT16 };
 
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<2; i++)
-    {
-        if(i == 0 && !supportsBGRA)
-            continue;
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
 
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
+    return test_readimage<CL_MEM_OBJECT_IMAGE3D, cl_ushort>(device, context,
+                                                            queue, &format);
+}
+int test_readimage3d_fp32(cl_device_id device, cl_context context,
+                          cl_command_queue queue, int num_elements)
+{
+    const cl_image_format format = { CL_RGBA, CL_FLOAT };
 
-    return err;
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(device)
+
+    return test_readimage<CL_MEM_OBJECT_IMAGE3D, cl_float>(device, context,
+                                                           queue, &format);
 }
diff --git a/test_conformance/basic/test_readimage3d.cpp b/test_conformance/basic/test_readimage3d.cpp
deleted file mode 100644
index 5fd7d109..00000000
--- a/test_conformance/basic/test_readimage3d.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-#include "harness/imageHelpers.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *bgra8888_kernel_code =
-"\n"
-"__kernel void test_bgra8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    tid_z = get_global_id(2);\n"
-"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
-"    dst[indx].x = color.z;\n"
-"    dst[indx].y = color.y;\n"
-"    dst[indx].z = color.x;\n"
-"    dst[indx].w = color.w;\n"
-"\n"
-"}\n";
-
-static const char *rgba8888_kernel_code =
-"\n"
-"__kernel void test_rgba8888(read_only image3d_t srcimg, __global float4 *dst, sampler_t sampler)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    tid_z = get_global_id(2);\n"
-"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
-"    //indx *= 4;\n"
-"    dst[indx].x = color.x;\n"
-"    dst[indx].y = color.y;\n"
-"    dst[indx].z = color.z;\n"
-"    dst[indx].w = color.w;\n"
-"\n"
-"}\n";
-
-static unsigned char *
-generate_3d_image8(int w, int h, int d, MTdata data)
-{
-    unsigned char   *ptr = (unsigned char*)malloc(w * h * d * 4);
-    int             i;
-
-    for (i=0; i<w*h*d*4; i++)
-        ptr[i] = (unsigned char)genrand_int32(data);
-
-    return ptr;
-}
-
-static int
-verify_3d_image8(double *image, float *outptr, int w, int h, int d)
-{
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-    {
-        if (outptr[i] != (float)image[i])
-        {
-            float ulps = Ulp_Error( outptr[i], image[i]);
-
-            if(! (fabsf(ulps) < 1.5f) )
-            {
-                log_error( "ERROR: Data sample %d does not validate! Expected (%a), got (%a), ulp %f\n",
-                    (int)i, image[i], outptr[ i ],  ulps );
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static double *
-prepare_reference(unsigned char * input_ptr, int w, int h, int d)
-{
-    double   *ptr = (double*)malloc(w * h * d * 4 * sizeof(double));
-    int         i;
-    for (i=0; i<w*h*d*4; i++)
-        ptr[i] = ((double)input_ptr[i]/255);
-
-    return ptr;
-}
-
-int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-	cl_mem streams[2];
-	cl_program program;
-	cl_kernel kernel;
-	cl_sampler sampler;
-	struct testFormat
-	{
-		const char* kernelName;
-		const char* kernelSourceString;
-		const cl_image_format img_format;
-	};
-
-	static testFormat formatsToTest[] =
-	{
-		{
-			"test_bgra8888",
-			bgra8888_kernel_code,
-			{CL_BGRA, CL_UNORM_INT8},
-		},
-		{
-			"test_rgba8888",
-			rgba8888_kernel_code,
-			{CL_RGBA, CL_UNORM_INT8},
-		},
-	};
-
-	unsigned char *input_ptr;
-	float *output_ptr;
-	double *ref_ptr;
-	size_t threads[3];
-	int img_width = 64;
-	int img_height = 64;
-	int img_depth = 64;
-	int err;
-	size_t origin[3] = {0, 0, 0};
-	size_t region[3] = {img_width, img_height, img_depth};
-	size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
-
-	PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-	for (uint32_t i = 0; i < ARRAY_SIZE(formatsToTest); i++)
-	{
-		if (!is_image_format_required(formatsToTest[i].img_format, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, device))
-			continue;
-
-		MTdata d = init_genrand( gRandomSeed );
-		input_ptr = generate_3d_image8(img_width, img_height, img_depth, d);
-		ref_ptr = prepare_reference(input_ptr, img_width, img_height, img_depth);
-		output_ptr = (float*)malloc(length);
-
-		streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &formatsToTest[i].img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-		test_error(err, "create_image_3d failed");
-
-		streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-		test_error(err, "clCreateBuffer failed");
-
-		sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-		test_error(err, "clCreateSampler failed");
-
-		err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
-		test_error(err, "clEnqueueWriteImage failed");
-
-		err = create_single_kernel_helper(context, &program, &kernel, 1, &formatsToTest[i].kernelSourceString, formatsToTest[i].kernelName);
-		test_error(err, "create_single_kernel_helper failed");
-
-		err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-		err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-		err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-		test_error(err, "clSetKernelArg failed");
-
-		threads[0] = (unsigned int)img_width;
-		threads[1] = (unsigned int)img_height;
-		threads[2] = (unsigned int)img_depth;
-
-		err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
-		test_error(err, "clEnqueueNDRangeKernel failed");
-
-		err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-		test_error(err, "clEnqueueReadBuffer failed");
-
-		err = verify_3d_image8(ref_ptr, output_ptr, img_width, img_height, img_depth);
-		if ( err == 0 )
-		{
-			log_info("READ_IMAGE3D_%s_%s test passed\n",
-			         GetChannelTypeName(formatsToTest[i].img_format.image_channel_data_type),
-			         GetChannelOrderName(formatsToTest[i].img_format.image_channel_order));
-		}
-
-		clReleaseSampler(sampler);
-		clReleaseMemObject(streams[0]);
-		clReleaseMemObject(streams[1]);
-		clReleaseKernel(kernel);
-		clReleaseProgram(program);
-		free_mtdata(d);
-		d = NULL;
-		free(input_ptr);
-		free(ref_ptr);
-		free(output_ptr);
-	}
-
-	return err;
-}
diff --git a/test_conformance/basic/test_readimage3d_fp32.cpp b/test_conformance/basic/test_readimage3d_fp32.cpp
deleted file mode 100644
index 2658d365..00000000
--- a/test_conformance/basic/test_readimage3d_fp32.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-
-static const char *rgbaFFFF_kernel_code =
-"__kernel void test_rgbaFFFF(read_only image3d_t srcimg, __global float *dst, sampler_t sampler)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    tid_z = get_global_id(2);\n"
-"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
-"    indx *= 4;\n"
-"    dst[indx+0] = color.x;\n"
-"    dst[indx+1] = color.y;\n"
-"    dst[indx+2] = color.z;\n"
-"    dst[indx+3] = color.w;\n"
-"\n"
-"}\n";
-
-
-static float *
-generate_float_image(int w, int h, int d, MTdata data)
-{
-    float   *ptr = (float*)malloc(w * h * d * 4 * sizeof(float));
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-        ptr[i] = get_random_float(-0x40000000, 0x40000000, data);
-
-    return ptr;
-}
-
-static int
-verify_float_image(float *image, float *outptr, int w, int h, int d)
-{
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("READ_IMAGE3D_RGBA_FLOAT test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("READ_IMAGE3D_RGBA_FLOAT test passed\n");
-    return 0;
-}
-
-
-int test_readimage3d_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    cl_program program;
-    cl_kernel kernel;
-    cl_image_format    img_format;
-    float *input_ptr, *output_ptr;
-    size_t threads[3];
-    int img_width = 64;
-    int img_height = 64;
-    int img_depth = 64;
-    int err;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, img_depth};
-    size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
-
-    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-    MTdata d = init_genrand( gRandomSeed );
-    input_ptr = generate_float_image(img_width, img_height, img_depth, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (float*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-  test_error(err, "create_image_3d failed");
-
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueWriteImage failed");
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
-  if (err)
-    return -1;
-
-  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-  test_error(err, "clCreateSampler failed");
-
-  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-  test_error(err, "clSetKernelArg failed");
-
-    threads[0] = (unsigned int)img_width;
-    threads[1] = (unsigned int)img_height;
-    threads[2] = (unsigned int)img_depth;
-  err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
-  test_error(err, "clEnqueueNDRangeKernel failed");
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  err = verify_float_image(input_ptr, output_ptr, img_width, img_height, img_depth);
-
-    // cleanup
-  clReleaseSampler(sampler);
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/basic/test_readimage3d_int16.cpp b/test_conformance/basic/test_readimage3d_int16.cpp
deleted file mode 100644
index 690d72a4..00000000
--- a/test_conformance/basic/test_readimage3d_int16.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static const char *rgba16_kernel_code =
-"__kernel void test_rgba16(read_only image3d_t srcimg, __global ushort4 *dst, sampler_t sampler)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    tid_z = get_global_id(2);\n"
-"    int    indx = (tid_z * get_image_height(srcimg) + tid_y) * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, sampler, (int4)(tid_x, tid_y, tid_z, 0));\n"
-"    ushort4 dst_write;\n"
-"    dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
-"    dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
-"    dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
-"    dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
-"    dst[indx] = dst_write;\n"
-"\n"
-"}\n";
-
-
-static unsigned short *
-generate_16bit_image(int w, int h, int d, MTdata data)
-{
-    unsigned short    *ptr = (cl_ushort*)malloc(w * h * d * 4 * sizeof(cl_ushort));
-    int             i;
-
-    for (i=0; i<w*h*d*4; i++)
-        ptr[i] = (cl_ushort)genrand_int32(data);
-
-    return ptr;
-}
-
-static int
-verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h, int d)
-{
-    int     i;
-
-    for (i=0; i<w*h*d*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("READ_IMAGE3D_RGBA_UNORM_INT16 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("READ_IMAGE3D_RGBA_UNORM_INT16 test passed\n");
-    return 0;
-}
-
-int test_readimage3d_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    cl_program program;
-    cl_kernel kernel;
-    cl_image_format    img_format;
-    cl_ushort *input_ptr, *output_ptr;
-    size_t threads[3];
-    int img_width = 64;
-    int img_height = 64;
-    int img_depth = 64;
-    int err;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, img_depth};
-    size_t length = img_width * img_height * img_depth * 4 * sizeof(cl_ushort);
-
-    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( device )
-
-    MTdata d = init_genrand( gRandomSeed );
-    input_ptr = generate_16bit_image(img_width, img_height, img_depth, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (cl_ushort*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &img_format, img_width, img_height, img_depth, 0, 0, NULL, &err);
-    test_error(err, "create_image_3d failed");
-
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
-  test_error(err, "clCreateBuffer failed");
-
-    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueWriteImage failed");
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
-  if (err)
-    return -1;
-
-  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-  test_error(err, "clCreateSampler failed");
-
-  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-  test_error(err, "clSetKernelArg failed");
-
-    threads[0] = (unsigned int)img_width;
-    threads[1] = (unsigned int)img_height;
-    threads[2] = (unsigned int)img_depth;
-  err = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, threads, NULL, 0, NULL, NULL);
-  test_error(err, "clEnqueueNDRangeKernel failed");
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  test_error(err, "clEnqueueReadBuffer failed");
-
-  err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height, img_depth);
-
-    // cleanup
-  clReleaseSampler(sampler);
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/basic/test_readimage_fp32.cpp b/test_conformance/basic/test_readimage_fp32.cpp
deleted file mode 100644
index aa9a82f1..00000000
--- a/test_conformance/basic/test_readimage_fp32.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-
-static const char *rgbaFFFF_kernel_code =
-"__kernel void test_rgbaFFFF(read_only image2d_t srcimg, __global float *dst, sampler_t smp)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
-"    indx *= 4;\n"
-"    dst[indx+0] = color.x;\n"
-"    dst[indx+1] = color.y;\n"
-"    dst[indx+2] = color.z;\n"
-"    dst[indx+3] = color.w;\n"
-"\n"
-"}\n";
-
-
-static float *
-generate_float_image(int w, int h, MTdata d)
-{
-    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
-
-    return ptr;
-}
-
-static int
-verify_float_image(float *image, float *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("READ_IMAGE_RGBA_FLOAT test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("READ_IMAGE_RGBA_FLOAT test passed\n");
-    return 0;
-}
-
-int test_readimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    cl_program program;
-    cl_kernel kernel;
-    cl_image_format    img_format;
-    float *input_ptr, *output_ptr;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int err;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(float);
-    MTdata d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-  d = init_genrand( gRandomSeed );
-    input_ptr = generate_float_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (float*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateArray failed\n");
-        return -1;
-    }
-
-    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteImage failed\n");
-        return -1;
-    }
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgbaFFFF_kernel_code, "test_rgbaFFFF" );
-  if (err)
-    return -1;
-
-  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-  test_error(err, "clCreateSampler failed");
-
-  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (unsigned int)img_width;
-    threads[1] = (unsigned int)img_height;
-  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueReadBuffer failed\n");
-    return -1;
-  }
-
-  err = verify_float_image(input_ptr, output_ptr, img_width, img_height);
-
-    // cleanup
-  clReleaseSampler(sampler);
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/basic/test_readimage_int16.cpp b/test_conformance/basic/test_readimage_int16.cpp
deleted file mode 100644
index fe2912f5..00000000
--- a/test_conformance/basic/test_readimage_int16.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static const char *rgba16_kernel_code =
-"__kernel void test_rgba16(read_only image2d_t srcimg, __global ushort4 *dst, sampler_t smp)\n"
-"{\n"
-"    int    tid_x = get_global_id(0);\n"
-"    int    tid_y = get_global_id(1);\n"
-"    int    indx = tid_y * get_image_width(srcimg) + tid_x;\n"
-"    float4 color;\n"
-"\n"
-"    color = read_imagef(srcimg, smp, (int2)(tid_x, tid_y));\n"
-"    ushort4 dst_write;\n"
-"    dst_write.x = convert_ushort_rte(color.x * 65535.0f);\n"
-"    dst_write.y = convert_ushort_rte(color.y * 65535.0f);\n"
-"    dst_write.z = convert_ushort_rte(color.z * 65535.0f);\n"
-"    dst_write.w = convert_ushort_rte(color.w * 65535.0f);\n"
-"    dst[indx] = dst_write;\n"
-"\n"
-"}\n";
-
-
-static unsigned short *
-generate_16bit_image(int w, int h, MTdata d)
-{
-    cl_ushort    *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (cl_ushort)genrand_int32(d);
-
-    return ptr;
-}
-
-static int
-verify_16bit_image(cl_ushort *image, cl_ushort *outptr, int w, int h)
-{
-    int     i;
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("READ_IMAGE_RGBA_UNORM_INT16 test failed\n");
-            return -1;
-        }
-    }
-
-    log_info("READ_IMAGE_RGBA_UNORM_INT16 test passed\n");
-    return 0;
-}
-
-int test_readimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[2];
-    cl_program program;
-    cl_kernel kernel;
-    cl_image_format    img_format;
-    cl_ushort *input_ptr, *output_ptr;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int err;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
-    MTdata d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    d = init_genrand( gRandomSeed );
-    input_ptr = generate_16bit_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (cl_ushort*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateArray failed\n");
-        return -1;
-    }
-
-    err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteImage failed\n");
-        return -1;
-    }
-
-  err = create_single_kernel_helper(context, &program, &kernel, 1, &rgba16_kernel_code, "test_rgba16" );
-  if (err)
-    return -1;
-
-  cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
-  test_error(err, "clCreateSampler failed");
-
-  err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
-  err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    threads[0] = (unsigned int)img_width;
-    threads[1] = (unsigned int)img_height;
-  err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("%s clEnqueueNDRangeKernel failed\n", __FUNCTION__);
-    return -1;
-  }
-
-  err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
-  if (err != CL_SUCCESS)
-  {
-    log_error("clEnqueueReadBuffer failed\n");
-    return -1;
-  }
-
-  err = verify_16bit_image(input_ptr, output_ptr, img_width, img_height);
-
-    // cleanup
-  clReleaseSampler(sampler);
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/basic/test_rw_image_access_qualifier.cpp b/test_conformance/basic/test_rw_image_access_qualifier.cpp
index 87e3f60b..b06c82a5 100644
--- a/test_conformance/basic/test_rw_image_access_qualifier.cpp
+++ b/test_conformance/basic/test_rw_image_access_qualifier.cpp
@@ -98,10 +98,13 @@ int test_rw_image_access_qualifier(cl_device_id device_id, cl_context context, c
     return -1;
     }
 
+    MTdata mtData = init_genrand(gRandomSeed);
     /* Fill input array with random values */
     for (i = 0; i < size; i++) {
-        input[i] = (unsigned int)(rand()/((double)RAND_MAX + 1)*255);
+        input[i] = genrand_int32(mtData);
     }
+    free_mtdata(mtData);
+    mtData = NULL;
 
     /* Zero out output array */
     for (i = 0; i < size; i++) {
diff --git a/test_conformance/basic/test_writeimage.cpp b/test_conformance/basic/test_writeimage.cpp
index 259b50d9..a2847e27 100644
--- a/test_conformance/basic/test_writeimage.cpp
+++ b/test_conformance/basic/test_writeimage.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "harness/compat.h"
+#include "harness/imageHelpers.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -24,331 +25,237 @@
 
 #include "procs.h"
 
-static const char *bgra8888_write_kernel_code =
-"\n"
-"__kernel void test_bgra8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
-"{\n"
-"    int            tid_x = get_global_id(0);\n"
-"    int            tid_y = get_global_id(1);\n"
-"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
-"    float4         color;\n"
-"\n"
-"    indx *= 4;\n"
-"    color = (float4)((float)src[indx+2], (float)src[indx+1], (float)src[indx+0], (float)src[indx+3]);\n"
-"    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
-"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
-"\n"
-"}\n";
-
-
-static const char *rgba8888_write_kernel_code =
-"\n"
-"__kernel void test_rgba8888_write(__global unsigned char *src, write_only image2d_t dstimg)\n"
-"{\n"
-"    int            tid_x = get_global_id(0);\n"
-"    int            tid_y = get_global_id(1);\n"
-"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
-"    float4         color;\n"
-"\n"
-"    indx *= 4;\n"
-"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
-"    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);\n"
-"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
-"\n"
-"}\n";
-
-
-static unsigned char *
-generate_8888_image(int w, int h, MTdata d)
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include "procs.h"
+
+namespace {
+const char *kernel_source = R"(
+__kernel void test_CL_BGRACL_UNORM_INT8(__global unsigned char *src, write_only image2d_t dstimg)
 {
-    cl_uchar   *ptr = (cl_uchar *)malloc(w * h * 4);
-    int             i;
+    int            tid_x = get_global_id(0);
+    int            tid_y = get_global_id(1);
+    int            indx = tid_y * get_image_width(dstimg) + tid_x;
+    float4         color;
+
+    indx *= 4;
+    color = (float4)((float)src[indx+2], (float)src[indx+1], (float)src[indx+0], (float)src[indx+3]);
+    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);
+    write_imagef(dstimg, (int2)(tid_x, tid_y), color);
+}
 
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (cl_uchar)genrand_int32(d);
+__kernel void test_CL_RGBACL_UNORM_INT8(__global unsigned char *src, write_only image2d_t dstimg)
+{
+    int            tid_x = get_global_id(0);
+    int            tid_y = get_global_id(1);
+    int            indx = tid_y * get_image_width(dstimg) + tid_x;
+    float4         color;
+
+    indx *= 4;
+    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);
+    color /= (float4)(255.0f, 255.0f, 255.0f, 255.0f);
+    write_imagef(dstimg, (int2)(tid_x, tid_y), color);
+}
 
-    return ptr;
+__kernel void test_CL_RGBACL_UNORM_INT16(__global unsigned short *src, write_only image2d_t dstimg)
+{
+    int            tid_x = get_global_id(0);
+    int            tid_y = get_global_id(1);
+    int            indx = tid_y * get_image_width(dstimg) + tid_x;
+    float4         color;
+
+    indx *= 4;
+    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);
+    color /= 65535.0f;
+    write_imagef(dstimg, (int2)(tid_x, tid_y), color);
 }
 
-static int
-verify_bgra8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+__kernel void test_CL_RGBACL_FLOAT(__global float *src, write_only image2d_t dstimg)
 {
-    int     i;
+    int            tid_x = get_global_id(0);
+    int            tid_y = get_global_id(1);
+    int            indx = tid_y * get_image_width(dstimg) + tid_x;
+    float4         color;
+
+    indx *= 4;
+    color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);
+    write_imagef(dstimg, (int2)(tid_x, tid_y), color);
+}
+)";
 
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("WRITE_IMAGE_BGRA_UNORM_INT8 test failed\n");
-            return -1;
-        }
-    }
 
-    log_info("WRITE_IMAGE_BGRA_UNORM_INT8 test passed\n");
-    return 0;
+template <typename T> void generate_random_inputs(std::vector<T> &v)
+{
+    RandomSeed seed(gRandomSeed);
+
+    auto random_generator = [&seed]() {
+        return static_cast<T>(genrand_int32(seed));
+    };
+
+    std::generate(v.begin(), v.end(), random_generator);
 }
 
-static int
-verify_rgba8888_image(unsigned char *image, unsigned char *outptr, int w, int h)
+template <> void generate_random_inputs<float>(std::vector<float> &v)
 {
-    int     i;
+    RandomSeed seed(gRandomSeed);
 
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("WRITE_IMAGE_RGBA_UNORM_INT8 test failed\n");
-            return -1;
-        }
-    }
+    auto random_generator = [&seed]() {
+        return get_random_float(-0x40000000, 0x40000000, seed);
+    };
 
-    log_info("WRITE_IMAGE_RGBA_UNORM_INT8 test passed\n");
-    return 0;
+    std::generate(v.begin(), v.end(), random_generator);
 }
 
 
-int test_writeimage(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+const char *get_mem_flag_name(cl_mem_flags flags)
 {
-    cl_mem streams[6];
-    cl_program program[2];
-    cl_kernel kernel[4];
-
-    unsigned char    *input_ptr[2], *output_ptr;
-    cl_image_format    img_format;
-    cl_image_format *supported_formats;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int i, err, any_err = 0;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(unsigned char);
-    int supportsBGRA = 0;
-    cl_uint numFormats = 0;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    MTdata d = init_genrand( gRandomSeed );
-    input_ptr[0] = generate_8888_image(img_width, img_height, d);
-    input_ptr[1] = generate_8888_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-    output_ptr = (unsigned char*)malloc(length);
-
-    if(gIsEmbedded)
-    {
-        /* Get the supported image formats to see if BGRA is supported */
-        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, 0, NULL, &numFormats);
-        supported_formats = (cl_image_format *) malloc(sizeof(cl_image_format) * numFormats);
-        clGetSupportedImageFormats (context, CL_MEM_READ_WRITE, CL_MEM_OBJECT_IMAGE2D, numFormats, supported_formats, NULL);
-
-        for(i = 0; i < numFormats; i++)
-        {
-            if(supported_formats[i].image_channel_order == CL_BGRA)
-            {
-                    supportsBGRA = 1;
-                    break;
-            }
-        }
-    }
-    else
+    switch (flags)
     {
-        supportsBGRA = 1;
+        case CL_MEM_READ_WRITE: return "CL_MEM_READ_WRITE";
+        case CL_MEM_WRITE_ONLY: return "CL_MEM_WRITE_ONLY";
+        default: return "Unsupported cl_mem_flags value";
     }
+}
 
-    if(supportsBGRA)
-    {
-        img_format.image_channel_order = CL_BGRA;
-        img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[0] = clCreateImage2D(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-        if (!streams[0])
-        {
-            log_error("clCreateImage2D failed\n");
-            return -1;
-        }
-    }
+template <typename T>
+int test_writeimage(cl_device_id device, cl_context context,
+                    cl_command_queue queue, const cl_image_format *img_format,
+                    cl_mem_flags img_flags)
+{
+    clMemWrapper streams[2];
+    clProgramWrapper program;
+    clKernelWrapper kernel;
 
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[1] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
+    std::string kernel_name("test_");
 
-    if(supportsBGRA)
-    {
-        img_format.image_channel_order = CL_BGRA;
-        img_format.image_channel_data_type = CL_UNORM_INT8;
-        streams[2] = clCreateImage2D(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
-        if (!streams[2])
-        {
-            log_error("clCreateImage2D failed\n");
-            return -1;
-        }
-    }
+    size_t img_width = 512;
+    size_t img_height = 512;
 
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT8;
-    streams[3] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
+    int err;
 
-    streams[4] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[4])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[5] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[5])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
+    const size_t origin[3] = { 0, 0, 0 };
+    const size_t region[3] = { img_width, img_height, 1 };
 
-    err = clEnqueueWriteBuffer(queue, streams[4], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer(queue, streams[5], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
+    const size_t num_elements = img_width * img_height * 4;
+    const size_t length = num_elements * sizeof(T);
 
-    if(supportsBGRA)
-    {
-        err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &bgra8888_write_kernel_code, "test_bgra8888_write" );
-        if (err)
-                return -1;
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
 
-        kernel[2] = clCreateKernel(program[0], "test_bgra8888_write", NULL);
-        if (!kernel[2])
-        {
-                log_error("clCreateKernel failed\n");
-                return -1;
-        }
-    }
+    std::vector<T> input(num_elements);
+    std::vector<T> output(num_elements);
 
-    err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &rgba8888_write_kernel_code, "test_rgba8888_write" );
-    if (err)
-    return -1;
-    kernel[3] = clCreateKernel(program[1], "test_rgba8888_write", NULL);
-    if (!kernel[3])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
+    generate_random_inputs(input);
 
-    if(supportsBGRA)
-    {
-        err  = clSetKernelArg(kernel[0], 0, sizeof streams[4], &streams[4]);
-        err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
+    streams[0] = create_image_2d(context, img_flags, img_format, img_width,
+                                 img_height, 0, nullptr, &err);
+    test_error(err, "create_image failed.");
 
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams[5], &streams[5]);
-    err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
+    streams[1] =
+        clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err);
+    test_error(err, "clCreateBuffer failed.");
+
+    err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length,
+                               input.data(), 0, nullptr, nullptr);
+    test_error(err, "clEnqueueWriteImage failed.");
+
+    kernel_name += GetChannelOrderName(img_format->image_channel_order);
+    kernel_name += GetChannelTypeName(img_format->image_channel_data_type);
+
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &kernel_source, kernel_name.c_str());
+    test_error(err, "create_single_kernel_helper failed.");
+
+    err |= clSetKernelArg(kernel, 0, sizeof(streams[1]), &streams[1]);
+    err |= clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]);
+    test_error(err, "clSetKernelArgs failed\n");
 
-    if(supportsBGRA)
+    size_t threads[] = { img_width, img_height };
+    err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, threads, nullptr, 0,
+                                 nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed\n");
+
+    err = clEnqueueReadImage(queue, streams[0], CL_TRUE, origin, region, 0, 0,
+                             output.data(), 0, nullptr, nullptr);
+
+    if (0 != memcmp(input.data(), output.data(), length))
     {
-        err  = clSetKernelArg(kernel[2], 0, sizeof streams[4], &streams[4]);
-        err |= clSetKernelArg(kernel[2], 1, sizeof streams[2], &streams[2]);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
+        log_error("WRITE_IMAGE_%s_%s with %s test failed\n",
+                  GetChannelOrderName(img_format->image_channel_order),
+                  GetChannelTypeName(img_format->image_channel_data_type),
+                  get_mem_flag_name(img_flags));
+        err = -1;
     }
-
-    err  = clSetKernelArg(kernel[3], 0, sizeof streams[5], &streams[5]);
-    err |= clSetKernelArg(kernel[3], 1, sizeof streams[3], &streams[3]);
-    if (err != CL_SUCCESS)
+    else
     {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
+        log_info("WRITE_IMAGE_%s_%s with %s test passed\n",
+                 GetChannelOrderName(img_format->image_channel_order),
+                 GetChannelTypeName(img_format->image_channel_data_type),
+                 get_mem_flag_name(img_flags));
     }
 
-    threads[0] = (unsigned int)img_width;
-    threads[1] = (unsigned int)img_height;
-
-    for (i=0; i<4; i++)
-    {
-         if(!supportsBGRA && (i == 0 || i == 2))
-            continue;
+    return err;
+}
 
-        err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
+bool check_format(cl_device_id device, cl_context context,
+                  cl_mem_object_type image_type,
+                  const cl_image_format img_format, cl_mem_flags test_flags)
+{
+    return is_image_format_required(img_format, test_flags, image_type, device)
+        || is_image_format_supported(context, test_flags, image_type,
+                                     &img_format);
+}
+}
+int test_writeimage(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int num_elements)
+{
+    int err = 0;
+    const cl_image_format format[] = { { CL_RGBA, CL_UNORM_INT8 },
+                                       { CL_BGRA, CL_UNORM_INT8 } };
+    const cl_mem_flags test_flags[] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
 
-        err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clReadImage failed\n");
-            return -1;
-        }
+    for (size_t i = 0; i < ARRAY_SIZE(test_flags) && !err; i++)
+    {
+        err = test_writeimage<cl_uchar>(device, context, queue, &format[0],
+                                        test_flags[i]);
 
-        switch (i)
+        if (check_format(device, context, CL_MEM_OBJECT_IMAGE2D, format[1],
+                         test_flags[i]))
         {
-            case 0:
-            case 2:
-                err = verify_bgra8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
-                break;
-            case 1:
-            case 3:
-                err = verify_rgba8888_image(input_ptr[i&0x01], output_ptr, img_width, img_height);
-                break;
+            err |= test_writeimage<cl_uchar>(device, context, queue, &format[1],
+                                             test_flags[i]);
         }
-
-        //if (err)
-        //break;
-
-        any_err |= err;
     }
+    return err;
+}
 
-    // cleanup
-    if(supportsBGRA)
-        clReleaseMemObject(streams[0]);
+int test_writeimage_int16(cl_device_id device, cl_context context,
+                          cl_command_queue queue, int num_elements)
+{
+    int err = 0;
+    const cl_image_format format = { CL_RGBA, CL_UNORM_INT16 };
+    const cl_mem_flags test_flags[] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
 
-    clReleaseMemObject(streams[1]);
+    for (size_t i = 0; i < ARRAY_SIZE(test_flags) && !err; i++)
+    {
+        err = test_writeimage<cl_ushort>(device, context, queue, &format,
+                                         test_flags[i]);
+    }
+    return err;
+}
 
-    if(supportsBGRA)
-        clReleaseMemObject(streams[2]);
+int test_writeimage_fp32(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements)
+{
+    int err = 0;
+    const cl_image_format format = { CL_RGBA, CL_FLOAT };
+    const cl_mem_flags test_flags[] = { CL_MEM_WRITE_ONLY, CL_MEM_READ_WRITE };
 
-    clReleaseMemObject(streams[3]);
-    clReleaseMemObject(streams[4]);
-    clReleaseMemObject(streams[5]);
-    for (i=0; i<2; i++)
+    for (size_t i = 0; i < ARRAY_SIZE(test_flags) && !err; i++)
     {
-        if(i == 0 && !supportsBGRA)
-            continue;
-
-        clReleaseKernel(kernel[i]);
-        clReleaseKernel(kernel[i+2]);
-        clReleaseProgram(program[i]);
+        err = test_writeimage<cl_float>(device, context, queue, &format,
+                                        test_flags[i]);
     }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return any_err;
+    return err;
 }
diff --git a/test_conformance/basic/test_writeimage_fp32.cpp b/test_conformance/basic/test_writeimage_fp32.cpp
deleted file mode 100644
index c68463ac..00000000
--- a/test_conformance/basic/test_writeimage_fp32.cpp
+++ /dev/null
@@ -1,190 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-
-static const char *rgbaFFFF_write_kernel_code =
-"__kernel void test_rgbaFFFF_write(__global float *src, write_only image2d_t dstimg)\n"
-"{\n"
-"    int            tid_x = get_global_id(0);\n"
-"    int            tid_y = get_global_id(1);\n"
-"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
-"    float4         color;\n"
-"\n"
-"    indx *= 4;\n"
-"    color = (float4)(src[indx+0], src[indx+1], src[indx+2], src[indx+3]);\n"
-"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
-"\n"
-"}\n";
-
-
-static float *
-generate_float_image(int w, int h, MTdata d)
-{
-    float   *ptr = (float*)malloc(w * h * 4 * sizeof(float));
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = get_random_float(-0x40000000, 0x40000000, d);
-
-    return ptr;
-}
-
-static int
-verify_float_image(const char *string, float *image, float *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (outptr[i] != image[i])
-        {
-            log_error("%s failed\n", string);
-            return -1;
-        }
-    }
-
-    log_info("%s passed\n", string);
-    return 0;
-}
-
-int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[3];
-    cl_program program;
-    cl_kernel kernel[2];
-    cl_image_format    img_format;
-    float *input_ptr, *output_ptr;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int i, err, any_err = 0;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(float);
-    MTdata d;
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    d = init_genrand( gRandomSeed );
-    input_ptr = generate_float_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (float*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_FLOAT;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateArray failed\n");
-        return -1;
-    }
-
-  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1,
-                                      &rgbaFFFF_write_kernel_code,
-                                      "test_rgbaFFFF_write");
-    if (err) return -1;
-    kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)img_width;
-  threads[1] = (unsigned int)img_height;
-
-    for (i=0; i<2; i++)
-    {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clExecuteKernel failed\n");
-            return -1;
-        }
-
-    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clReadImage failed\n");
-            return -1;
-        }
-
-        err = verify_float_image((i == 0) ? "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_READ_WRITE" :
-                             "WRITE_IMAGE_RGBA_FLOAT test with memflags = CL_MEM_WRITE_ONLY",
-                             input_ptr, output_ptr, img_width, img_height);
-        any_err |= err;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseKernel(kernel[0]);
-    clReleaseKernel(kernel[1]);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return any_err;
-}
-
-
diff --git a/test_conformance/basic/test_writeimage_int16.cpp b/test_conformance/basic/test_writeimage_int16.cpp
deleted file mode 100644
index d863a3a3..00000000
--- a/test_conformance/basic/test_writeimage_int16.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-static const char *rgba16_write_kernel_code =
-"__kernel void test_rgba16_write(__global unsigned short *src, write_only image2d_t dstimg)\n"
-"{\n"
-"    int            tid_x = get_global_id(0);\n"
-"    int            tid_y = get_global_id(1);\n"
-"    int            indx = tid_y * get_image_width(dstimg) + tid_x;\n"
-"    float4         color;\n"
-"\n"
-"    indx *= 4;\n"
-"    color = (float4)((float)src[indx+0], (float)src[indx+1], (float)src[indx+2], (float)src[indx+3]);\n"
-"    color /= 65535.0f;\n"
-"    write_imagef(dstimg, (int2)(tid_x, tid_y), color);\n"
-"\n"
-"}\n";
-
-
-static unsigned short *
-generate_16bit_image(int w, int h, MTdata d)
-{
-    cl_ushort  *ptr = (cl_ushort*)malloc(w * h * 4 * sizeof(cl_ushort));
-    int             i;
-
-    for (i=0; i<w*h*4; i++)
-        ptr[i] = (cl_ushort)genrand_int32(d);
-
-    return ptr;
-}
-
-// normalized 16bit ints ... get dived by 64k then muled by 64k...
-// give the poor things some tolerance
-#define MAX_ERR 1
-
-static int
-verify_16bit_image(const char *string, cl_ushort *image, cl_ushort *outptr, int w, int h)
-{
-    int     i;
-
-    for (i=0; i<w*h*4; i++)
-    {
-        if (abs(outptr[i] - image[i]) > MAX_ERR)
-        {
-            log_error("%s failed\n", string);
-            return -1;
-        }
-    }
-
-    log_info("%s passed\n", string);
-    return 0;
-}
-
-int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
-    cl_mem streams[3];
-    cl_program program;
-    cl_kernel kernel[2];
-    cl_image_format    img_format;
-    cl_ushort *input_ptr, *output_ptr;
-    size_t threads[2];
-    int img_width = 512;
-    int img_height = 512;
-    int i, err, any_err = 0;
-    size_t origin[3] = {0, 0, 0};
-    size_t region[3] = {img_width, img_height, 1};
-    size_t length = img_width * img_height * 4 * sizeof(cl_ushort);
-
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
-    MTdata d = init_genrand( gRandomSeed );
-    input_ptr = generate_16bit_image(img_width, img_height, d);
-    free_mtdata(d); d = NULL;
-
-    output_ptr = (cl_ushort*)malloc(length);
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[0] = create_image_2d(context, CL_MEM_READ_WRITE, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-
-    img_format.image_channel_order = CL_RGBA;
-    img_format.image_channel_data_type = CL_UNORM_INT16;
-    streams[1] = create_image_2d(context, CL_MEM_WRITE_ONLY, &img_format, img_width, img_height, 0, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("create_image_2d failed\n");
-        return -1;
-    }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateArray failed\n");
-        return -1;
-    }
-
-  err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueWriteBuffer failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper(context, &program, &kernel[0], 1,
-                                      &rgba16_write_kernel_code,
-                                      "test_rgba16_write");
-    if (err) return -1;
-    kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
-    if (!kernel[1])
-    {
-        log_error("clCreateKernel failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[0], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[0], 1, sizeof streams[0], &streams[0]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  err  = clSetKernelArg(kernel[1], 0, sizeof streams[2], &streams[2]);
-  err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-  threads[0] = (unsigned int)img_width;
-  threads[1] = (unsigned int)img_height;
-
-    for (i=0; i<2; i++)
-    {
-    err = clEnqueueNDRangeKernel(queue, kernel[i], 2, NULL, threads, NULL, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clExecuteKernel failed\n");
-            return -1;
-        }
-
-    err = clEnqueueReadImage(queue, streams[i], CL_TRUE, origin, region, 0, 0, output_ptr, 0, NULL, NULL);
-        if (err != CL_SUCCESS)
-        {
-            log_error("clReadImage failed\n");
-            return -1;
-        }
-
-        err = verify_16bit_image((i == 0) ? "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_READ_WRITE" :
-                             "WRITE_IMAGE_RGBA_UNORM_INT16 test with memflags = CL_MEM_WRITE_ONLY",
-                             input_ptr, output_ptr, img_width, img_height);
-        any_err |= err;
-    }
-
-    // cleanup
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseKernel(kernel[0]);
-    clReleaseKernel(kernel[1]);
-    clReleaseProgram(program);
-    free(input_ptr);
-    free(output_ptr);
-
-    return any_err;
-}
-
-
diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp
index 49a57f92..28317880 100644
--- a/test_conformance/buffers/test_buffer_read.cpp
+++ b/test_conformance/buffers/test_buffer_read.cpp
@@ -768,7 +768,6 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman
     size_t      global_work_size[3];
     cl_int      err;
     int         i;
-    size_t      lastIndex;
     size_t      ptrSizes[5];
     int         src_flag_id;
     int         total_errors = 0;
@@ -849,11 +848,11 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman
                 return -1;
             }
 
-            lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
             err = clEnqueueReadBuffer(queue, buffer, false, 0,
                                       ptrSizes[i] * num_elements, outptr[i], 0,
                                       NULL, &event);
 #ifdef CHECK_FOR_NON_WAIT
+            size_t lastIndex = (num_elements * (1 << i) - 1) * ptrSizes[0];
             if ( ((uchar *)outptr[i])[lastIndex] ){
                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
@@ -904,7 +903,6 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c
     size_t      global_work_size[3];
     cl_int      err;
     int         i;
-    size_t      lastIndex;
     size_t      ptrSizes[5];
     int         src_flag_id;
     int         total_errors = 0;
@@ -984,11 +982,11 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c
                 return -1;
             }
 
-            lastIndex = ( num_elements * ( 1 << i ) - 1 ) * ptrSizes[0];
             err = clEnqueueReadBuffer(queue, buffer, false, 0,
                                       ptrSizes[i] * num_elements,
                                       (void *)(outptr[i]), 0, NULL, &event);
 #ifdef CHECK_FOR_NON_WAIT
+            size_t lastIndex = (num_elements * (1 << i) - 1) * ptrSizes[0];
             if ( ((uchar *)outptr[i])[lastIndex] ){
                 log_error( "    clEnqueueReadBuffer() possibly returned only after inappropriately waiting for execution to be finished\n" );
                 log_error( "    Function was run asynchornously, but last value in array was set in code line following clEnqueueReadBuffer()\n" );
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 09c14ed1..d905b2ca 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -2914,7 +2914,8 @@ public:
                 + "-1);\n"
                   "  if(hisAtomicValue != hisValue)\n"
                   "  { // fail\n"
-                  "    atomic_store(&destMemory[myId], myValue-1);\n";
+                  "    atomic_store_explicit(&destMemory[myId], myValue-1,"
+                  " memory_order_relaxed, memory_scope_work_group);\n";
             if (LocalMemory())
                 program += "    hisId = "
                            "(hisId+get_local_size(0)-1)%get_local_size(0);\n";
@@ -3133,7 +3134,7 @@ public:
                                   NumNonAtomicVariablesPerThread() - 1);
                         log_error("ERROR: Thread #%u observed invalid values "
                                   "in other thread's variables\n",
-                                  workOffset + i, myValue);
+                                  workOffset + i);
                         correct = false;
                         return true;
                     }
diff --git a/test_conformance/common/vulkan_wrapper/CMakeLists.txt b/test_conformance/common/vulkan_wrapper/CMakeLists.txt
new file mode 100644
index 00000000..c647b4b6
--- /dev/null
+++ b/test_conformance/common/vulkan_wrapper/CMakeLists.txt
@@ -0,0 +1,69 @@
+set(VULKAN_WRAPPER_SOURCES
+    vulkan_wrapper.cpp
+    opencl_vulkan_wrapper.cpp
+    vulkan_utility.cpp
+    vulkan_list_map.cpp
+)
+
+# needed by Vulkan wrapper to compile
+add_cxx_flag_if_supported(-Wmisleading-indentation)
+add_cxx_flag_if_supported(-Wno-narrowing)
+add_cxx_flag_if_supported(-Wno-format)
+add_cxx_flag_if_supported(-Wno-error)
+add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
+add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785
+add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784
+add_cxx_flag_if_supported(-Wno-unused-variable)
+add_cxx_flag_if_supported(-Wno-error=terminate)
+add_cxx_flag_if_supported(-Wno-error=unused-function)
+add_cxx_flag_if_supported(-Wno-error=return-type)
+
+link_directories(${CLConform_VULKAN_LIBRARIES_DIR})
+
+list(APPEND CLConform_INCLUDE_DIR ${VULKAN_INCLUDE_DIR})
+
+add_library(vulkan_wrapper STATIC ${VULKAN_WRAPPER_SOURCES})
+
+if(ANDROID)
+    target_compile_definitions(vulkan_wrapper PUBLIC VK_USE_PLATFORM_ANDROID_KHR)
+elseif(WIN32)
+    target_compile_definitions(vulkan_wrapper PUBLIC VK_USE_PLATFORM_WIN32_KHR)
+elseif(APPLE)
+    target_compile_definitions(vulkan_wrapper PUBLIC VK_USE_PLATFORM_METAL_EXT)
+elseif(UNIX)
+    # Variable taken from Vulkan samples, commented out due to lack of WSI
+    # Choose WSI based on VKB_WSI_SELECTION
+    #if (VKB_WSI_SELECTION STREQUAL XCB OR VKB_WSI_SELECTION STREQUAL XLIB OR VKB_WSI_SELECTION STREQUAL WAYLAND)
+    #    find_package(PkgConfig REQUIRED)
+    #endif()
+    #if (VKB_WSI_SELECTION STREQUAL XCB)
+    #    pkg_check_modules(XCB xcb REQUIRED)
+    #    if (XCB_FOUND)
+    #        target_compile_definitions(vulkan_wrapper PUBLIC VK_USE_PLATFORM_XCB_KHR)
+    #    endif()
+    #elseif (VKB_WSI_SELECTION STREQUAL XLIB)
+    #    pkg_check_modules(X11 x11 REQUIRED)
+    #    if (X11_FOUND)
+    #        target_compile_definitions(vulkan_wrapper PUBLIC VK_USE_PLATFORM_XLIB_KHR)
+    #    endif()
+    #elseif (VKB_WSI_SELECTION STREQUAL WAYLAND)
+    #    pkg_check_modules(WAYLAND wayland-client REQUIRED)
+    #    if (WAYLAND_FOUND)
+    #        target_compile_definitions(vulkan_wrapper PUBLIC VK_USE_PLATFORM_WAYLAND_KHR)
+    #    endif()
+    #elseif (VKB_WSI_SELECTION STREQUAL D2D)
+    #    set(DIRECT_TO_DISPLAY TRUE)
+    #    set(DIRECT_TO_DISPLAY TRUE PARENT_SCOPE)
+    #    target_compile_definitions(vulkan_wrapper PUBLIC VK_USE_PLATFORM_DISPLAY_KHR)
+    #else()
+    #    message(FATAL_ERROR "Unknown WSI")
+    #endif()
+endif()
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+include_directories (${CLConform_INCLUDE_DIR})
+
+if (NOT WIN32)
+target_link_libraries(vulkan_wrapper dl)
+endif()
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
index 9d9a6601..0a459e97 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
@@ -831,7 +831,7 @@ clExternalSemaphore::clExternalSemaphore(
     }
 }
 
-clExternalSemaphore::~clExternalSemaphore()
+clExternalSemaphore::~clExternalSemaphore() noexcept(false)
 {
     cl_int err = clReleaseSemaphoreKHRptr(m_externalSemaphore);
     if (err != CL_SUCCESS)
@@ -851,3 +851,8 @@ void clExternalSemaphore::wait(cl_command_queue cmd_queue)
     clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0,
                                   NULL, NULL);
 }
+
+cl_semaphore_khr &clExternalSemaphore::getCLSemaphore()
+{
+    return m_externalSemaphore;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
index d9f8dccb..5143332d 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
@@ -120,9 +120,10 @@ public:
         const VulkanSemaphore &deviceSemaphore, cl_context context,
         VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
         cl_device_id deviceId);
-    virtual ~clExternalSemaphore();
+    virtual ~clExternalSemaphore() noexcept(false);
     void signal(cl_command_queue command_queue);
     void wait(cl_command_queue command_queue);
+    cl_semaphore_khr &getCLSemaphore();
     // operator openclExternalSemaphore_t() const;
 };
 
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
index 017aefd2..c62a71e1 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
@@ -161,7 +161,7 @@
 #define vkCreateImage _vkCreateImage
 #define vkGetImageMemoryRequirements _vkGetImageMemoryRequirements
 #define vkDestroyImage _vkDestroyImage
-#define vkDestroyBuffe _vkDestroyBuffer
+#define vkDestroyBuffer _vkDestroyBuffer
 #define vkDestroyPipeline _vkDestroyPipeline
 #define vkDestroyShaderModule _vkDestroyShaderModule
 #define vkGetPhysicalDeviceMemoryProperties _vkGetPhysicalDeviceMemoryProperties
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
index bdae5d22..4e276519 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
@@ -14,9 +14,8 @@
 // limitations under the License.
 //
 
-#ifdef _WIN32
-#define NOMINMAX
-#endif
+
+#include <algorithm>
 #include "vulkan_list_map.hpp"
 #include "vulkan_utility.hpp"
 #include "vulkan_wrapper.hpp"
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
index 52206779..52206779 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp
index 1a313cce..1a313cce 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp
index 04f5a594..04f5a594 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
index 6209a747..3ce4af6b 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
@@ -15,10 +15,10 @@
 //
 
 #ifdef _WIN32
-#define NOMINMAX
 #include <Windows.h>
 #include <dxgi1_2.h>
 #include <aclapi.h>
+#include <algorithm>
 #endif
 #include <vulkan/vulkan.h>
 #include "vulkan_wrapper.hpp"
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
index 37925ee4..37925ee4 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
index 359bcae4..2473a1d7 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
@@ -1,463 +1,463 @@
-//
-// Copyright (c) 2022 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#ifndef _vulkan_wrapper_types_hpp_
-#define _vulkan_wrapper_types_hpp_
-
-#include <vulkan/vulkan.h>
-
-#define VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT 4
-#define VULKAN_REMAINING_MIP_LEVELS VK_REMAINING_MIP_LEVELS
-#define VULKAN_REMAINING_ARRAY_LAYERS VK_REMAINING_ARRAY_LAYERS
-
-class VulkanInstance;
-class VulkanPhysicalDevice;
-class VulkanMemoryHeap;
-class VulkanMemoryType;
-class VulkanQueueFamily;
-class VulkanDevice;
-class VulkanQueue;
-class VulkanDescriptorSetLayoutBinding;
-class VulkanDescriptorSetLayout;
-class VulkanPipelineLayout;
-class VulkanShaderModule;
-class VulkanPipeline;
-class VulkanComputePipeline;
-class VulkanDescriptorPool;
-class VulkanDescriptorSet;
-class VulkanCommandPool;
-class VulkanCommandBuffer;
-class VulkanBuffer;
-class VulkanOffset3D;
-class VulkanExtent3D;
-class VulkanImage;
-class VulkanImage2D;
-class VulkanImageView;
-class VulkanDeviceMemory;
-class VulkanSemaphore;
-
-class VulkanPhysicalDeviceList;
-class VulkanMemoryHeapList;
-class VulkanMemoryTypeList;
-class VulkanQueueFamilyList;
-class VulkanQueueFamilyToQueueCountMap;
-class VulkanQueueFamilyToQueueListMap;
-class VulkanQueueList;
-class VulkanCommandBufferList;
-class VulkanDescriptorSetLayoutList;
-class VulkanBufferList;
-class VulkanImage2DList;
-class VulkanImageViewList;
-class VulkanDeviceMemoryList;
-class VulkanSemaphoreList;
-
-enum VulkanQueueFlag
-{
-    VULKAN_QUEUE_FLAG_GRAPHICS = VK_QUEUE_GRAPHICS_BIT,
-    VULKAN_QUEUE_FLAG_COMPUTE = VK_QUEUE_COMPUTE_BIT,
-    VULKAN_QUEUE_FLAG_TRANSFER = VK_QUEUE_TRANSFER_BIT,
-    VULKAN_QUEUE_FLAG_MASK_ALL = VULKAN_QUEUE_FLAG_GRAPHICS
-        | VULKAN_QUEUE_FLAG_COMPUTE | VULKAN_QUEUE_FLAG_TRANSFER
-};
-
-enum VulkanDescriptorType
-{
-    VULKAN_DESCRIPTOR_TYPE_SAMPLER = VK_DESCRIPTOR_TYPE_SAMPLER,
-    VULKAN_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER =
-        VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
-    VULKAN_DESCRIPTOR_TYPE_SAMPLED_IMAGE = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-    VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-    VULKAN_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER =
-        VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
-    VULKAN_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER =
-        VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
-    VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
-    VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
-    VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC =
-        VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
-    VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC =
-        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC,
-    VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT =
-        VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
-};
-
-enum VulkanShaderStage
-{
-    VULKAN_SHADER_STAGE_VERTEX = VK_SHADER_STAGE_VERTEX_BIT,
-    VULKAN_SHADER_STAGE_FRAGMENT = VK_SHADER_STAGE_FRAGMENT_BIT,
-    VULKAN_SHADER_STAGE_COMPUTE = VK_SHADER_STAGE_COMPUTE_BIT,
-    VULKAN_SHADER_STAGE_ALL_GRAPHICS = VK_SHADER_STAGE_ALL_GRAPHICS,
-    VULKAN_SHADER_STAGE_ALL = VK_SHADER_STAGE_ALL
-};
-
-enum VulkanPipelineBindPoint
-{
-    VULKAN_PIPELINE_BIND_POINT_GRAPHICS = VK_PIPELINE_BIND_POINT_GRAPHICS,
-    VULKAN_PIPELINE_BIND_POINT_COMPUTE = VK_PIPELINE_BIND_POINT_COMPUTE
-};
-
-enum VulkanMemoryTypeProperty
-{
-    VULKAN_MEMORY_TYPE_PROPERTY_NONE = 0,
-    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL =
-        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
-    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT =
-        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
-        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED =
-        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
-        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT =
-        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
-        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT =
-        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
-        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
-        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
-    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED =
-        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
-        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
-        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
-    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT =
-        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
-        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
-        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
-        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
-};
-
-enum VulkanMemoryHeapFlag
-{
-    VULKAN_MEMORY_HEAP_FLAG_NONE = 0,
-    VULKAN_MEMORY_HEAP_FLAG_DEVICE_LOCAL = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
-};
-
-enum VulkanExternalMemoryHandleType
-{
-    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE = 0,
-    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD =
-        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
-    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT =
-        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
-    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT =
-        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
-    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
-        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
-        | VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
-};
-
-enum VulkanExternalSemaphoreHandleType
-{
-    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE = 0,
-    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD =
-        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
-    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT =
-        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
-    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT =
-        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
-    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
-        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
-        | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
-};
-
-enum VulkanBufferUsage
-{
-    VULKAN_BUFFER_USAGE_TRANSFER_SRC = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
-    VULKAN_BUFFER_USAGE_TRANSFER_DST = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-    VULKAN_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER =
-        VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
-    VULKAN_BUFFER_USAGE_STORAGE_TEXEL_BUFFER =
-        VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
-    VULKAN_BUFFER_USAGE_UNIFORM_BUFFER = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
-    VULKAN_BUFFER_USAGE_STORAGE_BUFFER = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
-    VULKAN_BUFFER_USAGE_INDEX_BUFFER = VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
-    VULKAN_BUFFER_USAGE_VERTEX_BUFFER = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
-    VULKAN_BUFFER_USAGE_INDIRECT_BUFFER = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
-    VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST =
-        VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
-        | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-    VULKAN_BUFFER_USAGE_UNIFORM_BUFFER_TRANSFER_SRC_DST =
-        VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
-        | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
-};
-
-enum VulkanSharingMode
-{
-    VULKAN_SHARING_MODE_EXCLUSIVE = VK_SHARING_MODE_EXCLUSIVE,
-    VULKAN_SHARING_MODE_CONCURRENT = VK_SHARING_MODE_CONCURRENT
-};
-
-enum VulkanImageType
-{
-    VULKAN_IMAGE_TYPE_1D = VK_IMAGE_TYPE_1D,
-    VULKAN_IMAGE_TYPE_2D = VK_IMAGE_TYPE_2D,
-    VULKAN_IMAGE_TYPE_3D = VK_IMAGE_TYPE_3D
-};
-
-enum VulkanFormat
-{
-    VULKAN_FORMAT_UNDEFINED = VK_FORMAT_UNDEFINED,
-    VULKAN_FORMAT_R4G4_UNORM_PACK8 = VK_FORMAT_R4G4_UNORM_PACK8,
-    VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16 = VK_FORMAT_R4G4B4A4_UNORM_PACK16,
-    VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16 = VK_FORMAT_B4G4R4A4_UNORM_PACK16,
-    VULKAN_FORMAT_R5G6B5_UNORM_PACK16 = VK_FORMAT_R5G6B5_UNORM_PACK16,
-    VULKAN_FORMAT_B5G6R5_UNORM_PACK16 = VK_FORMAT_B5G6R5_UNORM_PACK16,
-    VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16 = VK_FORMAT_R5G5B5A1_UNORM_PACK16,
-    VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16 = VK_FORMAT_B5G5R5A1_UNORM_PACK16,
-    VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16 = VK_FORMAT_A1R5G5B5_UNORM_PACK16,
-    VULKAN_FORMAT_R8_UNORM = VK_FORMAT_R8_UNORM,
-    VULKAN_FORMAT_R8_SNORM = VK_FORMAT_R8_SNORM,
-    VULKAN_FORMAT_R8_USCALED = VK_FORMAT_R8_USCALED,
-    VULKAN_FORMAT_R8_SSCALED = VK_FORMAT_R8_SSCALED,
-    VULKAN_FORMAT_R8_UINT = VK_FORMAT_R8_UINT,
-    VULKAN_FORMAT_R8_SINT = VK_FORMAT_R8_SINT,
-    VULKAN_FORMAT_R8_SRGB = VK_FORMAT_R8_SRGB,
-    VULKAN_FORMAT_R8G8_SNORM = VK_FORMAT_R8G8_SNORM,
-    VULKAN_FORMAT_R8G8_UNORM = VK_FORMAT_R8G8_UNORM,
-    VULKAN_FORMAT_R8G8_USCALED = VK_FORMAT_R8G8_USCALED,
-    VULKAN_FORMAT_R8G8_SSCALED = VK_FORMAT_R8G8_SSCALED,
-    VULKAN_FORMAT_R8G8_UINT = VK_FORMAT_R8G8_UINT,
-    VULKAN_FORMAT_R8G8_SINT = VK_FORMAT_R8G8_SINT,
-    VULKAN_FORMAT_R8G8_SRGB = VK_FORMAT_R8G8_SRGB,
-    VULKAN_FORMAT_R8G8B8_UNORM = VK_FORMAT_R8G8B8_UNORM,
-    VULKAN_FORMAT_R8G8B8_SNORM = VK_FORMAT_R8G8B8_SNORM,
-    VULKAN_FORMAT_R8G8B8_USCALED = VK_FORMAT_R8G8B8_USCALED,
-    VULKAN_FORMAT_R8G8B8_SSCALED = VK_FORMAT_R8G8B8_SSCALED,
-    VULKAN_FORMAT_R8G8B8_UINT = VK_FORMAT_R8G8B8_UINT,
-    VULKAN_FORMAT_R8G8B8_SINT = VK_FORMAT_R8G8B8_SINT,
-    VULKAN_FORMAT_R8G8B8_SRGB = VK_FORMAT_R8G8B8_SRGB,
-    VULKAN_FORMAT_B8G8R8_UNORM = VK_FORMAT_B8G8R8_UNORM,
-    VULKAN_FORMAT_B8G8R8_SNORM = VK_FORMAT_B8G8R8_SNORM,
-    VULKAN_FORMAT_B8G8R8_USCALED = VK_FORMAT_B8G8R8_USCALED,
-    VULKAN_FORMAT_B8G8R8_SSCALED = VK_FORMAT_B8G8R8_SSCALED,
-    VULKAN_FORMAT_B8G8R8_UINT = VK_FORMAT_B8G8R8_UINT,
-    VULKAN_FORMAT_B8G8R8_SINT = VK_FORMAT_B8G8R8_SINT,
-    VULKAN_FORMAT_B8G8R8_SRGB = VK_FORMAT_B8G8R8_SRGB,
-    VULKAN_FORMAT_R8G8B8A8_UNORM = VK_FORMAT_R8G8B8A8_UNORM,
-    VULKAN_FORMAT_R8G8B8A8_SNORM = VK_FORMAT_R8G8B8A8_SNORM,
-    VULKAN_FORMAT_R8G8B8A8_USCALED = VK_FORMAT_R8G8B8A8_USCALED,
-    VULKAN_FORMAT_R8G8B8A8_SSCALED = VK_FORMAT_R8G8B8A8_SSCALED,
-    VULKAN_FORMAT_R8G8B8A8_UINT = VK_FORMAT_R8G8B8A8_UINT,
-    VULKAN_FORMAT_R8G8B8A8_SINT = VK_FORMAT_R8G8B8A8_SINT,
-    VULKAN_FORMAT_R8G8B8A8_SRGB = VK_FORMAT_R8G8B8A8_SRGB,
-    VULKAN_FORMAT_B8G8R8A8_UNORM = VK_FORMAT_B8G8R8A8_UNORM,
-    VULKAN_FORMAT_B8G8R8A8_SNORM = VK_FORMAT_B8G8R8A8_SNORM,
-    VULKAN_FORMAT_B8G8R8A8_USCALED = VK_FORMAT_B8G8R8A8_USCALED,
-    VULKAN_FORMAT_B8G8R8A8_SSCALED = VK_FORMAT_B8G8R8A8_SSCALED,
-    VULKAN_FORMAT_B8G8R8A8_UINT = VK_FORMAT_B8G8R8A8_UINT,
-    VULKAN_FORMAT_B8G8R8A8_SINT = VK_FORMAT_B8G8R8A8_SINT,
-    VULKAN_FORMAT_B8G8R8A8_SRGB = VK_FORMAT_B8G8R8A8_SRGB,
-    VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32 = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
-    VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32 = VK_FORMAT_A8B8G8R8_SNORM_PACK32,
-    VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32 = VK_FORMAT_A8B8G8R8_USCALED_PACK32,
-    VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32 = VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
-    VULKAN_FORMAT_A8B8G8R8_UINT_PACK32 = VK_FORMAT_A8B8G8R8_UINT_PACK32,
-    VULKAN_FORMAT_A8B8G8R8_SINT_PACK32 = VK_FORMAT_A8B8G8R8_SINT_PACK32,
-    VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32 = VK_FORMAT_A8B8G8R8_SRGB_PACK32,
-    VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32 = VK_FORMAT_A2R10G10B10_UNORM_PACK32,
-    VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32 = VK_FORMAT_A2R10G10B10_SNORM_PACK32,
-    VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32 =
-        VK_FORMAT_A2R10G10B10_USCALED_PACK32,
-    VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32 =
-        VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
-    VULKAN_FORMAT_A2R10G10B10_UINT_PACK32 = VK_FORMAT_A2R10G10B10_UINT_PACK32,
-    VULKAN_FORMAT_A2R10G10B10_SINT_PACK32 = VK_FORMAT_A2R10G10B10_SINT_PACK32,
-    VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32 = VK_FORMAT_A2B10G10R10_UNORM_PACK32,
-    VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32 = VK_FORMAT_A2B10G10R10_SNORM_PACK32,
-    VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32 =
-        VK_FORMAT_A2B10G10R10_USCALED_PACK32,
-    VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32 =
-        VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
-    VULKAN_FORMAT_A2B10G10R10_UINT_PACK32 = VK_FORMAT_A2B10G10R10_UINT_PACK32,
-    VULKAN_FORMAT_A2B10G10R10_SINT_PACK32 = VK_FORMAT_A2B10G10R10_SINT_PACK32,
-    VULKAN_FORMAT_R16_UNORM = VK_FORMAT_R16_UNORM,
-    VULKAN_FORMAT_R16_SNORM = VK_FORMAT_R16_SNORM,
-    VULKAN_FORMAT_R16_USCALED = VK_FORMAT_R16_USCALED,
-    VULKAN_FORMAT_R16_SSCALED = VK_FORMAT_R16_SSCALED,
-    VULKAN_FORMAT_R16_UINT = VK_FORMAT_R16_UINT,
-    VULKAN_FORMAT_R16_SINT = VK_FORMAT_R16_SINT,
-    VULKAN_FORMAT_R16_SFLOAT = VK_FORMAT_R16_SFLOAT,
-    VULKAN_FORMAT_R16G16_UNORM = VK_FORMAT_R16G16_UNORM,
-    VULKAN_FORMAT_R16G16_SNORM = VK_FORMAT_R16G16_SNORM,
-    VULKAN_FORMAT_R16G16_USCALED = VK_FORMAT_R16G16_USCALED,
-    VULKAN_FORMAT_R16G16_SSCALED = VK_FORMAT_R16G16_SSCALED,
-    VULKAN_FORMAT_R16G16_UINT = VK_FORMAT_R16G16_UINT,
-    VULKAN_FORMAT_R16G16_SINT = VK_FORMAT_R16G16_SINT,
-    VULKAN_FORMAT_R16G16_SFLOAT = VK_FORMAT_R16G16_SFLOAT,
-    VULKAN_FORMAT_R16G16B16_UNORM = VK_FORMAT_R16G16B16_UNORM,
-    VULKAN_FORMAT_R16G16B16_SNORM = VK_FORMAT_R16G16B16_SNORM,
-    VULKAN_FORMAT_R16G16B16_USCALED = VK_FORMAT_R16G16B16_USCALED,
-    VULKAN_FORMAT_R16G16B16_SSCALED = VK_FORMAT_R16G16B16_SSCALED,
-    VULKAN_FORMAT_R16G16B16_UINT = VK_FORMAT_R16G16B16_UINT,
-    VULKAN_FORMAT_R16G16B16_SINT = VK_FORMAT_R16G16B16_SINT,
-    VULKAN_FORMAT_R16G16B16_SFLOAT = VK_FORMAT_R16G16B16_SFLOAT,
-    VULKAN_FORMAT_R16G16B16A16_UNORM = VK_FORMAT_R16G16B16A16_UNORM,
-    VULKAN_FORMAT_R16G16B16A16_SNORM = VK_FORMAT_R16G16B16A16_SNORM,
-    VULKAN_FORMAT_R16G16B16A16_USCALED = VK_FORMAT_R16G16B16A16_USCALED,
-    VULKAN_FORMAT_R16G16B16A16_SSCALED = VK_FORMAT_R16G16B16A16_SSCALED,
-    VULKAN_FORMAT_R16G16B16A16_UINT = VK_FORMAT_R16G16B16A16_UINT,
-    VULKAN_FORMAT_R16G16B16A16_SINT = VK_FORMAT_R16G16B16A16_SINT,
-    VULKAN_FORMAT_R16G16B16A16_SFLOAT = VK_FORMAT_R16G16B16A16_SFLOAT,
-    VULKAN_FORMAT_R32_UINT = VK_FORMAT_R32_UINT,
-    VULKAN_FORMAT_R32_SINT = VK_FORMAT_R32_SINT,
-    VULKAN_FORMAT_R32_SFLOAT = VK_FORMAT_R32_SFLOAT,
-    VULKAN_FORMAT_R32G32_UINT = VK_FORMAT_R32G32_UINT,
-    VULKAN_FORMAT_R32G32_SINT = VK_FORMAT_R32G32_SINT,
-    VULKAN_FORMAT_R32G32_SFLOAT = VK_FORMAT_R32G32_SFLOAT,
-    VULKAN_FORMAT_R32G32B32_UINT = VK_FORMAT_R32G32B32_UINT,
-    VULKAN_FORMAT_R32G32B32_SINT = VK_FORMAT_R32G32B32_SINT,
-    VULKAN_FORMAT_R32G32B32_SFLOAT = VK_FORMAT_R32G32B32_SFLOAT,
-    VULKAN_FORMAT_R32G32B32A32_UINT = VK_FORMAT_R32G32B32A32_UINT,
-    VULKAN_FORMAT_R32G32B32A32_SINT = VK_FORMAT_R32G32B32A32_SINT,
-    VULKAN_FORMAT_R32G32B32A32_SFLOAT = VK_FORMAT_R32G32B32A32_SFLOAT,
-    VULKAN_FORMAT_R64_UINT = VK_FORMAT_R64_UINT,
-    VULKAN_FORMAT_R64_SINT = VK_FORMAT_R64_SINT,
-    VULKAN_FORMAT_R64_SFLOAT = VK_FORMAT_R64_SFLOAT,
-    VULKAN_FORMAT_R64G64_UINT = VK_FORMAT_R64G64_UINT,
-    VULKAN_FORMAT_R64G64_SINT = VK_FORMAT_R64G64_SINT,
-    VULKAN_FORMAT_R64G64_SFLOAT = VK_FORMAT_R64G64_SFLOAT,
-    VULKAN_FORMAT_R64G64B64_UINT = VK_FORMAT_R64G64B64_UINT,
-    VULKAN_FORMAT_R64G64B64_SINT = VK_FORMAT_R64G64B64_SINT,
-    VULKAN_FORMAT_R64G64B64_SFLOAT = VK_FORMAT_R64G64B64_SFLOAT,
-    VULKAN_FORMAT_R64G64B64A64_UINT = VK_FORMAT_R64G64B64A64_UINT,
-    VULKAN_FORMAT_R64G64B64A64_SINT = VK_FORMAT_R64G64B64A64_SINT,
-    VULKAN_FORMAT_R64G64B64A64_SFLOAT = VK_FORMAT_R64G64B64A64_SFLOAT,
-    VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32 = VK_FORMAT_B10G11R11_UFLOAT_PACK32,
-    VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32 = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
-    VULKAN_FORMAT_D16_UNORM = VK_FORMAT_D16_UNORM,
-    VULKAN_FORMAT_X8_D24_UNORM_PACK32 = VK_FORMAT_X8_D24_UNORM_PACK32,
-    VULKAN_FORMAT_D32_SFLOAT = VK_FORMAT_D32_SFLOAT,
-    VULKAN_FORMAT_S8_UINT = VK_FORMAT_S8_UINT,
-    VULKAN_FORMAT_D16_UNORM_S8_UINT = VK_FORMAT_D16_UNORM_S8_UINT,
-    VULKAN_FORMAT_D24_UNORM_S8_UINT = VK_FORMAT_D24_UNORM_S8_UINT,
-    VULKAN_FORMAT_D32_SFLOAT_S8_UINT = VK_FORMAT_D32_SFLOAT_S8_UINT,
-    VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK = VK_FORMAT_BC1_RGB_UNORM_BLOCK,
-    VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK = VK_FORMAT_BC1_RGB_SRGB_BLOCK,
-    VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK = VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
-    VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK = VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
-    VULKAN_FORMAT_BC2_UNORM_BLOCK = VK_FORMAT_BC2_UNORM_BLOCK,
-    VULKAN_FORMAT_BC2_SRGB_BLOCK = VK_FORMAT_BC2_SRGB_BLOCK,
-    VULKAN_FORMAT_BC3_UNORM_BLOCK = VK_FORMAT_BC3_UNORM_BLOCK,
-    VULKAN_FORMAT_BC3_SRGB_BLOCK = VK_FORMAT_BC3_SRGB_BLOCK,
-    VULKAN_FORMAT_BC4_UNORM_BLOCK = VK_FORMAT_BC4_UNORM_BLOCK,
-    VULKAN_FORMAT_BC4_SNORM_BLOCK = VK_FORMAT_BC4_SNORM_BLOCK,
-    VULKAN_FORMAT_BC5_UNORM_BLOCK = VK_FORMAT_BC5_UNORM_BLOCK,
-    VULKAN_FORMAT_BC5_SNORM_BLOCK = VK_FORMAT_BC5_SNORM_BLOCK,
-    VULKAN_FORMAT_BC6H_UFLOAT_BLOCK = VK_FORMAT_BC6H_UFLOAT_BLOCK,
-    VULKAN_FORMAT_BC6H_SFLOAT_BLOCK = VK_FORMAT_BC6H_SFLOAT_BLOCK,
-    VULKAN_FORMAT_BC7_UNORM_BLOCK = VK_FORMAT_BC7_UNORM_BLOCK,
-    VULKAN_FORMAT_BC7_SRGB_BLOCK = VK_FORMAT_BC7_SRGB_BLOCK,
-    VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
-    VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
-    VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK =
-        VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
-    VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
-    VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK =
-        VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
-    VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
-    VULKAN_FORMAT_EAC_R11_UNORM_BLOCK = VK_FORMAT_EAC_R11_UNORM_BLOCK,
-    VULKAN_FORMAT_EAC_R11_SNORM_BLOCK = VK_FORMAT_EAC_R11_SNORM_BLOCK,
-    VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK = VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
-    VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK = VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK = VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK = VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK = VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK = VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK = VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK = VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK = VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK = VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK = VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK = VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK = VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK = VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK = VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK = VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK = VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK = VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK = VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK = VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK = VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK = VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK = VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK = VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK = VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK = VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK = VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK = VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
-    VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK = VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
-    VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK = VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
-};
-
-enum VulkanImageLayout
-{
-    VULKAN_IMAGE_LAYOUT_UNDEFINED = VK_IMAGE_LAYOUT_UNDEFINED,
-    VULKAN_IMAGE_LAYOUT_GENERAL = VK_IMAGE_LAYOUT_GENERAL,
-    VULKAN_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL =
-        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
-    VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL =
-        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
-};
-
-enum VulkanImageUsage
-{
-    VULKAN_IMAGE_USAGE_TRANSFER_SRC = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
-    VULKAN_IMAGE_USAGE_TRANSFER_DST = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
-    VULKAN_IMAGE_USAGE_SAMPLED = VK_IMAGE_USAGE_SAMPLED_BIT,
-    VULKAN_IMAGE_USAGE_STORAGE = VK_IMAGE_USAGE_STORAGE_BIT,
-    VULKAN_IMAGE_USAGE_COLOR_ATTACHMENT = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
-    VULKAN_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT =
-        VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
-    VULKAN_IMAGE_USAGE_TRANSIENT_ATTACHMENT =
-        VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT,
-    VULKAN_IMAGE_USAGE_INPUT_ATTACHMENT = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
-    VULKAN_IMAGE_USAGE_TRANSFER_SRC_DST =
-        VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
-    VULKAN_IMAGE_USAGE_STORAGE_TRANSFER_SRC_DST = VULKAN_IMAGE_USAGE_STORAGE
-        | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST,
-    VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST =
-        VK_IMAGE_USAGE_SAMPLED_BIT | VULKAN_IMAGE_USAGE_STORAGE
-        | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST
-};
-
-enum VulkanImageTiling
-{
-    VULKAN_IMAGE_TILING_OPTIMAL = VK_IMAGE_TILING_OPTIMAL,
-    VULKAN_IMAGE_TILING_LINEAR = VK_IMAGE_TILING_LINEAR
-};
-
-enum VulkanImageCreateFlag
-{
-    VULKAN_IMAGE_CREATE_FLAG_NONE = 0,
-    VULKAN_IMAGE_CREATE_FLAG_MUTABLE_FORMAT =
-        VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
-    VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE =
-        VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT,
-    VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE_MUTABLE_FORMAT =
-        VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
-};
-
-enum VulkanImageViewType
-{
-    VULKAN_IMAGE_VIEW_TYPE_1D = VK_IMAGE_VIEW_TYPE_1D,
-    VULKAN_IMAGE_VIEW_TYPE_2D = VK_IMAGE_VIEW_TYPE_2D,
-    VULKAN_IMAGE_VIEW_TYPE_3D = VK_IMAGE_VIEW_TYPE_3D,
-    VULKAN_IMAGE_VIEW_TYPE_CUBE = VK_IMAGE_VIEW_TYPE_CUBE,
-    VULKAN_IMAGE_VIEW_TYPE_1D_ARRAY = VK_IMAGE_VIEW_TYPE_1D_ARRAY,
-    VULKAN_IMAGE_VIEW_TYPE_2D_ARRAY = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
-    VULKAN_IMAGE_VIEW_TYPE_CUBE_ARRAY = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
-};
-
-#endif // _vulkan_wrapper_types_hpp_
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_wrapper_types_hpp_
+#define _vulkan_wrapper_types_hpp_
+
+#include <vulkan/vulkan.h>
+
+#define VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT 4
+#define VULKAN_REMAINING_MIP_LEVELS VK_REMAINING_MIP_LEVELS
+#define VULKAN_REMAINING_ARRAY_LAYERS VK_REMAINING_ARRAY_LAYERS
+
+class VulkanInstance;
+class VulkanPhysicalDevice;
+class VulkanMemoryHeap;
+class VulkanMemoryType;
+class VulkanQueueFamily;
+class VulkanDevice;
+class VulkanQueue;
+class VulkanDescriptorSetLayoutBinding;
+class VulkanDescriptorSetLayout;
+class VulkanPipelineLayout;
+class VulkanShaderModule;
+class VulkanPipeline;
+class VulkanComputePipeline;
+class VulkanDescriptorPool;
+class VulkanDescriptorSet;
+class VulkanCommandPool;
+class VulkanCommandBuffer;
+class VulkanBuffer;
+class VulkanOffset3D;
+class VulkanExtent3D;
+class VulkanImage;
+class VulkanImage2D;
+class VulkanImageView;
+class VulkanDeviceMemory;
+class VulkanSemaphore;
+
+class VulkanPhysicalDeviceList;
+class VulkanMemoryHeapList;
+class VulkanMemoryTypeList;
+class VulkanQueueFamilyList;
+class VulkanQueueFamilyToQueueCountMap;
+class VulkanQueueFamilyToQueueListMap;
+class VulkanQueueList;
+class VulkanCommandBufferList;
+class VulkanDescriptorSetLayoutList;
+class VulkanBufferList;
+class VulkanImage2DList;
+class VulkanImageViewList;
+class VulkanDeviceMemoryList;
+class VulkanSemaphoreList;
+
+enum VulkanQueueFlag
+{
+    VULKAN_QUEUE_FLAG_GRAPHICS = VK_QUEUE_GRAPHICS_BIT,
+    VULKAN_QUEUE_FLAG_COMPUTE = VK_QUEUE_COMPUTE_BIT,
+    VULKAN_QUEUE_FLAG_TRANSFER = VK_QUEUE_TRANSFER_BIT,
+    VULKAN_QUEUE_FLAG_MASK_ALL = VULKAN_QUEUE_FLAG_GRAPHICS
+        | VULKAN_QUEUE_FLAG_COMPUTE | VULKAN_QUEUE_FLAG_TRANSFER
+};
+
+enum VulkanDescriptorType
+{
+    VULKAN_DESCRIPTOR_TYPE_SAMPLER = VK_DESCRIPTOR_TYPE_SAMPLER,
+    VULKAN_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER =
+        VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+    VULKAN_DESCRIPTOR_TYPE_SAMPLED_IMAGE = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+    VULKAN_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER =
+        VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER =
+        VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC =
+        VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC =
+        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC,
+    VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT =
+        VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
+};
+
+enum VulkanShaderStage
+{
+    VULKAN_SHADER_STAGE_VERTEX = VK_SHADER_STAGE_VERTEX_BIT,
+    VULKAN_SHADER_STAGE_FRAGMENT = VK_SHADER_STAGE_FRAGMENT_BIT,
+    VULKAN_SHADER_STAGE_COMPUTE = VK_SHADER_STAGE_COMPUTE_BIT,
+    VULKAN_SHADER_STAGE_ALL_GRAPHICS = VK_SHADER_STAGE_ALL_GRAPHICS,
+    VULKAN_SHADER_STAGE_ALL = VK_SHADER_STAGE_ALL
+};
+
+enum VulkanPipelineBindPoint
+{
+    VULKAN_PIPELINE_BIND_POINT_GRAPHICS = VK_PIPELINE_BIND_POINT_GRAPHICS,
+    VULKAN_PIPELINE_BIND_POINT_COMPUTE = VK_PIPELINE_BIND_POINT_COMPUTE
+};
+
+enum VulkanMemoryTypeProperty
+{
+    VULKAN_MEMORY_TYPE_PROPERTY_NONE = 0,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT =
+        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED =
+        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT =
+        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+};
+
+enum VulkanMemoryHeapFlag
+{
+    VULKAN_MEMORY_HEAP_FLAG_NONE = 0,
+    VULKAN_MEMORY_HEAP_FLAG_DEVICE_LOCAL = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
+};
+
+enum VulkanExternalMemoryHandleType
+{
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE = 0,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+        | VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
+};
+
+enum VulkanExternalSemaphoreHandleType
+{
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE = 0,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+        | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
+};
+
+enum VulkanBufferUsage
+{
+    VULKAN_BUFFER_USAGE_TRANSFER_SRC = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+    VULKAN_BUFFER_USAGE_TRANSFER_DST = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+    VULKAN_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER =
+        VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_STORAGE_TEXEL_BUFFER =
+        VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_UNIFORM_BUFFER = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_STORAGE_BUFFER = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_INDEX_BUFFER = VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_VERTEX_BUFFER = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_INDIRECT_BUFFER = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST =
+        VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
+        | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+    VULKAN_BUFFER_USAGE_UNIFORM_BUFFER_TRANSFER_SRC_DST =
+        VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
+        | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+};
+
+enum VulkanSharingMode
+{
+    VULKAN_SHARING_MODE_EXCLUSIVE = VK_SHARING_MODE_EXCLUSIVE,
+    VULKAN_SHARING_MODE_CONCURRENT = VK_SHARING_MODE_CONCURRENT
+};
+
+enum VulkanImageType
+{
+    VULKAN_IMAGE_TYPE_1D = VK_IMAGE_TYPE_1D,
+    VULKAN_IMAGE_TYPE_2D = VK_IMAGE_TYPE_2D,
+    VULKAN_IMAGE_TYPE_3D = VK_IMAGE_TYPE_3D
+};
+
+enum VulkanFormat
+{
+    VULKAN_FORMAT_UNDEFINED = VK_FORMAT_UNDEFINED,
+    VULKAN_FORMAT_R4G4_UNORM_PACK8 = VK_FORMAT_R4G4_UNORM_PACK8,
+    VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16 = VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+    VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16 = VK_FORMAT_B4G4R4A4_UNORM_PACK16,
+    VULKAN_FORMAT_R5G6B5_UNORM_PACK16 = VK_FORMAT_R5G6B5_UNORM_PACK16,
+    VULKAN_FORMAT_B5G6R5_UNORM_PACK16 = VK_FORMAT_B5G6R5_UNORM_PACK16,
+    VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16 = VK_FORMAT_R5G5B5A1_UNORM_PACK16,
+    VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16 = VK_FORMAT_B5G5R5A1_UNORM_PACK16,
+    VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16 = VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+    VULKAN_FORMAT_R8_UNORM = VK_FORMAT_R8_UNORM,
+    VULKAN_FORMAT_R8_SNORM = VK_FORMAT_R8_SNORM,
+    VULKAN_FORMAT_R8_USCALED = VK_FORMAT_R8_USCALED,
+    VULKAN_FORMAT_R8_SSCALED = VK_FORMAT_R8_SSCALED,
+    VULKAN_FORMAT_R8_UINT = VK_FORMAT_R8_UINT,
+    VULKAN_FORMAT_R8_SINT = VK_FORMAT_R8_SINT,
+    VULKAN_FORMAT_R8_SRGB = VK_FORMAT_R8_SRGB,
+    VULKAN_FORMAT_R8G8_SNORM = VK_FORMAT_R8G8_SNORM,
+    VULKAN_FORMAT_R8G8_UNORM = VK_FORMAT_R8G8_UNORM,
+    VULKAN_FORMAT_R8G8_USCALED = VK_FORMAT_R8G8_USCALED,
+    VULKAN_FORMAT_R8G8_SSCALED = VK_FORMAT_R8G8_SSCALED,
+    VULKAN_FORMAT_R8G8_UINT = VK_FORMAT_R8G8_UINT,
+    VULKAN_FORMAT_R8G8_SINT = VK_FORMAT_R8G8_SINT,
+    VULKAN_FORMAT_R8G8_SRGB = VK_FORMAT_R8G8_SRGB,
+    VULKAN_FORMAT_R8G8B8_UNORM = VK_FORMAT_R8G8B8_UNORM,
+    VULKAN_FORMAT_R8G8B8_SNORM = VK_FORMAT_R8G8B8_SNORM,
+    VULKAN_FORMAT_R8G8B8_USCALED = VK_FORMAT_R8G8B8_USCALED,
+    VULKAN_FORMAT_R8G8B8_SSCALED = VK_FORMAT_R8G8B8_SSCALED,
+    VULKAN_FORMAT_R8G8B8_UINT = VK_FORMAT_R8G8B8_UINT,
+    VULKAN_FORMAT_R8G8B8_SINT = VK_FORMAT_R8G8B8_SINT,
+    VULKAN_FORMAT_R8G8B8_SRGB = VK_FORMAT_R8G8B8_SRGB,
+    VULKAN_FORMAT_B8G8R8_UNORM = VK_FORMAT_B8G8R8_UNORM,
+    VULKAN_FORMAT_B8G8R8_SNORM = VK_FORMAT_B8G8R8_SNORM,
+    VULKAN_FORMAT_B8G8R8_USCALED = VK_FORMAT_B8G8R8_USCALED,
+    VULKAN_FORMAT_B8G8R8_SSCALED = VK_FORMAT_B8G8R8_SSCALED,
+    VULKAN_FORMAT_B8G8R8_UINT = VK_FORMAT_B8G8R8_UINT,
+    VULKAN_FORMAT_B8G8R8_SINT = VK_FORMAT_B8G8R8_SINT,
+    VULKAN_FORMAT_B8G8R8_SRGB = VK_FORMAT_B8G8R8_SRGB,
+    VULKAN_FORMAT_R8G8B8A8_UNORM = VK_FORMAT_R8G8B8A8_UNORM,
+    VULKAN_FORMAT_R8G8B8A8_SNORM = VK_FORMAT_R8G8B8A8_SNORM,
+    VULKAN_FORMAT_R8G8B8A8_USCALED = VK_FORMAT_R8G8B8A8_USCALED,
+    VULKAN_FORMAT_R8G8B8A8_SSCALED = VK_FORMAT_R8G8B8A8_SSCALED,
+    VULKAN_FORMAT_R8G8B8A8_UINT = VK_FORMAT_R8G8B8A8_UINT,
+    VULKAN_FORMAT_R8G8B8A8_SINT = VK_FORMAT_R8G8B8A8_SINT,
+    VULKAN_FORMAT_R8G8B8A8_SRGB = VK_FORMAT_R8G8B8A8_SRGB,
+    VULKAN_FORMAT_B8G8R8A8_UNORM = VK_FORMAT_B8G8R8A8_UNORM,
+    VULKAN_FORMAT_B8G8R8A8_SNORM = VK_FORMAT_B8G8R8A8_SNORM,
+    VULKAN_FORMAT_B8G8R8A8_USCALED = VK_FORMAT_B8G8R8A8_USCALED,
+    VULKAN_FORMAT_B8G8R8A8_SSCALED = VK_FORMAT_B8G8R8A8_SSCALED,
+    VULKAN_FORMAT_B8G8R8A8_UINT = VK_FORMAT_B8G8R8A8_UINT,
+    VULKAN_FORMAT_B8G8R8A8_SINT = VK_FORMAT_B8G8R8A8_SINT,
+    VULKAN_FORMAT_B8G8R8A8_SRGB = VK_FORMAT_B8G8R8A8_SRGB,
+    VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32 = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32 = VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32 = VK_FORMAT_A8B8G8R8_USCALED_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32 = VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_UINT_PACK32 = VK_FORMAT_A8B8G8R8_UINT_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SINT_PACK32 = VK_FORMAT_A8B8G8R8_SINT_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32 = VK_FORMAT_A8B8G8R8_SRGB_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32 = VK_FORMAT_A2R10G10B10_UNORM_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32 = VK_FORMAT_A2R10G10B10_SNORM_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32 =
+        VK_FORMAT_A2R10G10B10_USCALED_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32 =
+        VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_UINT_PACK32 = VK_FORMAT_A2R10G10B10_UINT_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_SINT_PACK32 = VK_FORMAT_A2R10G10B10_SINT_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32 = VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32 = VK_FORMAT_A2B10G10R10_SNORM_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32 =
+        VK_FORMAT_A2B10G10R10_USCALED_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32 =
+        VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_UINT_PACK32 = VK_FORMAT_A2B10G10R10_UINT_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_SINT_PACK32 = VK_FORMAT_A2B10G10R10_SINT_PACK32,
+    VULKAN_FORMAT_R16_UNORM = VK_FORMAT_R16_UNORM,
+    VULKAN_FORMAT_R16_SNORM = VK_FORMAT_R16_SNORM,
+    VULKAN_FORMAT_R16_USCALED = VK_FORMAT_R16_USCALED,
+    VULKAN_FORMAT_R16_SSCALED = VK_FORMAT_R16_SSCALED,
+    VULKAN_FORMAT_R16_UINT = VK_FORMAT_R16_UINT,
+    VULKAN_FORMAT_R16_SINT = VK_FORMAT_R16_SINT,
+    VULKAN_FORMAT_R16_SFLOAT = VK_FORMAT_R16_SFLOAT,
+    VULKAN_FORMAT_R16G16_UNORM = VK_FORMAT_R16G16_UNORM,
+    VULKAN_FORMAT_R16G16_SNORM = VK_FORMAT_R16G16_SNORM,
+    VULKAN_FORMAT_R16G16_USCALED = VK_FORMAT_R16G16_USCALED,
+    VULKAN_FORMAT_R16G16_SSCALED = VK_FORMAT_R16G16_SSCALED,
+    VULKAN_FORMAT_R16G16_UINT = VK_FORMAT_R16G16_UINT,
+    VULKAN_FORMAT_R16G16_SINT = VK_FORMAT_R16G16_SINT,
+    VULKAN_FORMAT_R16G16_SFLOAT = VK_FORMAT_R16G16_SFLOAT,
+    VULKAN_FORMAT_R16G16B16_UNORM = VK_FORMAT_R16G16B16_UNORM,
+    VULKAN_FORMAT_R16G16B16_SNORM = VK_FORMAT_R16G16B16_SNORM,
+    VULKAN_FORMAT_R16G16B16_USCALED = VK_FORMAT_R16G16B16_USCALED,
+    VULKAN_FORMAT_R16G16B16_SSCALED = VK_FORMAT_R16G16B16_SSCALED,
+    VULKAN_FORMAT_R16G16B16_UINT = VK_FORMAT_R16G16B16_UINT,
+    VULKAN_FORMAT_R16G16B16_SINT = VK_FORMAT_R16G16B16_SINT,
+    VULKAN_FORMAT_R16G16B16_SFLOAT = VK_FORMAT_R16G16B16_SFLOAT,
+    VULKAN_FORMAT_R16G16B16A16_UNORM = VK_FORMAT_R16G16B16A16_UNORM,
+    VULKAN_FORMAT_R16G16B16A16_SNORM = VK_FORMAT_R16G16B16A16_SNORM,
+    VULKAN_FORMAT_R16G16B16A16_USCALED = VK_FORMAT_R16G16B16A16_USCALED,
+    VULKAN_FORMAT_R16G16B16A16_SSCALED = VK_FORMAT_R16G16B16A16_SSCALED,
+    VULKAN_FORMAT_R16G16B16A16_UINT = VK_FORMAT_R16G16B16A16_UINT,
+    VULKAN_FORMAT_R16G16B16A16_SINT = VK_FORMAT_R16G16B16A16_SINT,
+    VULKAN_FORMAT_R16G16B16A16_SFLOAT = VK_FORMAT_R16G16B16A16_SFLOAT,
+    VULKAN_FORMAT_R32_UINT = VK_FORMAT_R32_UINT,
+    VULKAN_FORMAT_R32_SINT = VK_FORMAT_R32_SINT,
+    VULKAN_FORMAT_R32_SFLOAT = VK_FORMAT_R32_SFLOAT,
+    VULKAN_FORMAT_R32G32_UINT = VK_FORMAT_R32G32_UINT,
+    VULKAN_FORMAT_R32G32_SINT = VK_FORMAT_R32G32_SINT,
+    VULKAN_FORMAT_R32G32_SFLOAT = VK_FORMAT_R32G32_SFLOAT,
+    VULKAN_FORMAT_R32G32B32_UINT = VK_FORMAT_R32G32B32_UINT,
+    VULKAN_FORMAT_R32G32B32_SINT = VK_FORMAT_R32G32B32_SINT,
+    VULKAN_FORMAT_R32G32B32_SFLOAT = VK_FORMAT_R32G32B32_SFLOAT,
+    VULKAN_FORMAT_R32G32B32A32_UINT = VK_FORMAT_R32G32B32A32_UINT,
+    VULKAN_FORMAT_R32G32B32A32_SINT = VK_FORMAT_R32G32B32A32_SINT,
+    VULKAN_FORMAT_R32G32B32A32_SFLOAT = VK_FORMAT_R32G32B32A32_SFLOAT,
+    VULKAN_FORMAT_R64_UINT = VK_FORMAT_R64_UINT,
+    VULKAN_FORMAT_R64_SINT = VK_FORMAT_R64_SINT,
+    VULKAN_FORMAT_R64_SFLOAT = VK_FORMAT_R64_SFLOAT,
+    VULKAN_FORMAT_R64G64_UINT = VK_FORMAT_R64G64_UINT,
+    VULKAN_FORMAT_R64G64_SINT = VK_FORMAT_R64G64_SINT,
+    VULKAN_FORMAT_R64G64_SFLOAT = VK_FORMAT_R64G64_SFLOAT,
+    VULKAN_FORMAT_R64G64B64_UINT = VK_FORMAT_R64G64B64_UINT,
+    VULKAN_FORMAT_R64G64B64_SINT = VK_FORMAT_R64G64B64_SINT,
+    VULKAN_FORMAT_R64G64B64_SFLOAT = VK_FORMAT_R64G64B64_SFLOAT,
+    VULKAN_FORMAT_R64G64B64A64_UINT = VK_FORMAT_R64G64B64A64_UINT,
+    VULKAN_FORMAT_R64G64B64A64_SINT = VK_FORMAT_R64G64B64A64_SINT,
+    VULKAN_FORMAT_R64G64B64A64_SFLOAT = VK_FORMAT_R64G64B64A64_SFLOAT,
+    VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32 = VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+    VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32 = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+    VULKAN_FORMAT_D16_UNORM = VK_FORMAT_D16_UNORM,
+    VULKAN_FORMAT_X8_D24_UNORM_PACK32 = VK_FORMAT_X8_D24_UNORM_PACK32,
+    VULKAN_FORMAT_D32_SFLOAT = VK_FORMAT_D32_SFLOAT,
+    VULKAN_FORMAT_S8_UINT = VK_FORMAT_S8_UINT,
+    VULKAN_FORMAT_D16_UNORM_S8_UINT = VK_FORMAT_D16_UNORM_S8_UINT,
+    VULKAN_FORMAT_D24_UNORM_S8_UINT = VK_FORMAT_D24_UNORM_S8_UINT,
+    VULKAN_FORMAT_D32_SFLOAT_S8_UINT = VK_FORMAT_D32_SFLOAT_S8_UINT,
+    VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK = VK_FORMAT_BC1_RGB_UNORM_BLOCK,
+    VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK = VK_FORMAT_BC1_RGB_SRGB_BLOCK,
+    VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK = VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+    VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK = VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+    VULKAN_FORMAT_BC2_UNORM_BLOCK = VK_FORMAT_BC2_UNORM_BLOCK,
+    VULKAN_FORMAT_BC2_SRGB_BLOCK = VK_FORMAT_BC2_SRGB_BLOCK,
+    VULKAN_FORMAT_BC3_UNORM_BLOCK = VK_FORMAT_BC3_UNORM_BLOCK,
+    VULKAN_FORMAT_BC3_SRGB_BLOCK = VK_FORMAT_BC3_SRGB_BLOCK,
+    VULKAN_FORMAT_BC4_UNORM_BLOCK = VK_FORMAT_BC4_UNORM_BLOCK,
+    VULKAN_FORMAT_BC4_SNORM_BLOCK = VK_FORMAT_BC4_SNORM_BLOCK,
+    VULKAN_FORMAT_BC5_UNORM_BLOCK = VK_FORMAT_BC5_UNORM_BLOCK,
+    VULKAN_FORMAT_BC5_SNORM_BLOCK = VK_FORMAT_BC5_SNORM_BLOCK,
+    VULKAN_FORMAT_BC6H_UFLOAT_BLOCK = VK_FORMAT_BC6H_UFLOAT_BLOCK,
+    VULKAN_FORMAT_BC6H_SFLOAT_BLOCK = VK_FORMAT_BC6H_SFLOAT_BLOCK,
+    VULKAN_FORMAT_BC7_UNORM_BLOCK = VK_FORMAT_BC7_UNORM_BLOCK,
+    VULKAN_FORMAT_BC7_SRGB_BLOCK = VK_FORMAT_BC7_SRGB_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK =
+        VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK =
+        VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
+    VULKAN_FORMAT_EAC_R11_UNORM_BLOCK = VK_FORMAT_EAC_R11_UNORM_BLOCK,
+    VULKAN_FORMAT_EAC_R11_SNORM_BLOCK = VK_FORMAT_EAC_R11_SNORM_BLOCK,
+    VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK = VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
+    VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK = VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK = VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK = VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK = VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK = VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK = VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK = VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK = VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK = VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK = VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK = VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK = VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK = VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK = VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK = VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK = VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK = VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK = VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK = VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK = VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK = VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK = VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK = VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK = VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK = VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK = VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK = VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK = VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK = VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+};
+
+enum VulkanImageLayout
+{
+    VULKAN_IMAGE_LAYOUT_UNDEFINED = VK_IMAGE_LAYOUT_UNDEFINED,
+    VULKAN_IMAGE_LAYOUT_GENERAL = VK_IMAGE_LAYOUT_GENERAL,
+    VULKAN_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL =
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+    VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL =
+        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+};
+
+enum VulkanImageUsage
+{
+    VULKAN_IMAGE_USAGE_TRANSFER_SRC = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+    VULKAN_IMAGE_USAGE_TRANSFER_DST = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+    VULKAN_IMAGE_USAGE_SAMPLED = VK_IMAGE_USAGE_SAMPLED_BIT,
+    VULKAN_IMAGE_USAGE_STORAGE = VK_IMAGE_USAGE_STORAGE_BIT,
+    VULKAN_IMAGE_USAGE_COLOR_ATTACHMENT = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT =
+        VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_TRANSIENT_ATTACHMENT =
+        VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_INPUT_ATTACHMENT = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_TRANSFER_SRC_DST =
+        VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+    VULKAN_IMAGE_USAGE_STORAGE_TRANSFER_SRC_DST = VULKAN_IMAGE_USAGE_STORAGE
+        | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST,
+    VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST =
+        VK_IMAGE_USAGE_SAMPLED_BIT | VULKAN_IMAGE_USAGE_STORAGE
+        | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST
+};
+
+enum VulkanImageTiling
+{
+    VULKAN_IMAGE_TILING_OPTIMAL = VK_IMAGE_TILING_OPTIMAL,
+    VULKAN_IMAGE_TILING_LINEAR = VK_IMAGE_TILING_LINEAR
+};
+
+enum VulkanImageCreateFlag
+{
+    VULKAN_IMAGE_CREATE_FLAG_NONE = 0,
+    VULKAN_IMAGE_CREATE_FLAG_MUTABLE_FORMAT =
+        VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
+    VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE =
+        VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT,
+    VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE_MUTABLE_FORMAT =
+        VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
+};
+
+enum VulkanImageViewType
+{
+    VULKAN_IMAGE_VIEW_TYPE_1D = VK_IMAGE_VIEW_TYPE_1D,
+    VULKAN_IMAGE_VIEW_TYPE_2D = VK_IMAGE_VIEW_TYPE_2D,
+    VULKAN_IMAGE_VIEW_TYPE_3D = VK_IMAGE_VIEW_TYPE_3D,
+    VULKAN_IMAGE_VIEW_TYPE_CUBE = VK_IMAGE_VIEW_TYPE_CUBE,
+    VULKAN_IMAGE_VIEW_TYPE_1D_ARRAY = VK_IMAGE_VIEW_TYPE_1D_ARRAY,
+    VULKAN_IMAGE_VIEW_TYPE_2D_ARRAY = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
+    VULKAN_IMAGE_VIEW_TYPE_CUBE_ARRAY = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
+};
+
+#endif // _vulkan_wrapper_types_hpp_
diff --git a/test_conformance/commonfns/CMakeLists.txt b/test_conformance/commonfns/CMakeLists.txt
index 5aa29250..bea20cf5 100644
--- a/test_conformance/commonfns/CMakeLists.txt
+++ b/test_conformance/commonfns/CMakeLists.txt
@@ -3,22 +3,10 @@ set(MODULE_NAME COMMONFNS)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     test_clamp.cpp
-    test_degrees.cpp
-    test_max.cpp
-    test_maxf.cpp
-    test_min.cpp
-    test_minf.cpp
+    test_unary_fn.cpp
     test_mix.cpp
-    test_radians.cpp
     test_step.cpp
-    test_stepf.cpp
     test_smoothstep.cpp
-    test_smoothstepf.cpp
-    test_sign.cpp
-    test_fmax.cpp
-    test_fmin.cpp
-    test_fmaxf.cpp
-    test_fminf.cpp
     test_binary_fn.cpp
 )
 
diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp
index b8364d5a..3e4b0b8e 100644
--- a/test_conformance/commonfns/main.cpp
+++ b/test_conformance/commonfns/main.cpp
@@ -13,11 +13,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
 #include "procs.h"
+#include "test_base.h"
+
+std::map<size_t, std::string> BaseFunctionTest::type2name;
 
 int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
 int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
@@ -32,25 +34,13 @@ static void initVecSizes() {
     }
 }
 
-
 test_definition test_list[] = {
-    ADD_TEST( clamp ),
-    ADD_TEST( degrees ),
-    ADD_TEST( fmax ),
-    ADD_TEST( fmaxf ),
-    ADD_TEST( fmin ),
-    ADD_TEST( fminf ),
-    ADD_TEST( max ),
-    ADD_TEST( maxf ),
-    ADD_TEST( min ),
-    ADD_TEST( minf ),
-    ADD_TEST( mix ),
-    ADD_TEST( radians ),
-    ADD_TEST( step ),
-    ADD_TEST( stepf ),
-    ADD_TEST( smoothstep ),
-    ADD_TEST( smoothstepf ),
-    ADD_TEST( sign ),
+    ADD_TEST(clamp),      ADD_TEST(degrees),     ADD_TEST(fmax),
+    ADD_TEST(fmaxf),      ADD_TEST(fmin),        ADD_TEST(fminf),
+    ADD_TEST(max),        ADD_TEST(maxf),        ADD_TEST(min),
+    ADD_TEST(minf),       ADD_TEST(mix),         ADD_TEST(mixf),
+    ADD_TEST(radians),    ADD_TEST(step),        ADD_TEST(stepf),
+    ADD_TEST(smoothstep), ADD_TEST(smoothstepf), ADD_TEST(sign),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
@@ -58,6 +48,14 @@ const int test_num = ARRAY_SIZE( test_list );
 int main(int argc, const char *argv[])
 {
     initVecSizes();
+
+    if (BaseFunctionTest::type2name.empty())
+    {
+        BaseFunctionTest::type2name[sizeof(half)] = "half";
+        BaseFunctionTest::type2name[sizeof(float)] = "float";
+        BaseFunctionTest::type2name[sizeof(double)] = "double";
+    }
+
     return runTestHarness(argc, argv, test_num, test_list, false, 0);
 }
 
diff --git a/test_conformance/commonfns/procs.h b/test_conformance/commonfns/procs.h
index dada94f9..c1115ee7 100644
--- a/test_conformance/commonfns/procs.h
+++ b/test_conformance/commonfns/procs.h
@@ -37,6 +37,8 @@ extern int        test_maxf(cl_device_id device, cl_context context, cl_command_
 extern int        test_min(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_mixf(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements);
 extern int        test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_step(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
@@ -44,11 +46,4 @@ extern int        test_smoothstep(cl_device_id device, cl_context context, cl_co
 extern int        test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 extern int        test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements);
 
-typedef int     (*binary_verify_float_fn)( float *x, float *y, float *out, int numElements, int vecSize );
-typedef int     (*binary_verify_double_fn)( double *x, double *y, double *out, int numElements, int vecSize );
-
-extern int      test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
-                           const char *fnName, bool vectorSecondParam,
-                           binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn );
-
 
diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h
new file mode 100644
index 00000000..44291042
--- /dev/null
+++ b/test_conformance/commonfns/test_base.h
@@ -0,0 +1,193 @@
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef TEST_COMMONFNS_BASE_H
+#define TEST_COMMONFNS_BASE_H
+
+#include <vector>
+#include <map>
+#include <memory>
+
+#include <CL/cl_half.h>
+#include <CL/cl_ext.h>
+
+#include "harness/deviceInfo.h"
+#include "harness/testHarness.h"
+#include "harness/typeWrappers.h"
+
+
+template <typename T>
+using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const,
+                                 const int num, const int vs, const int vp);
+
+
+template <typename T>
+using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num);
+
+
+using half = cl_half;
+
+
+struct BaseFunctionTest
+{
+    BaseFunctionTest(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elems, const char *fn,
+                     bool vsp)
+        : device(device), context(context), queue(queue), num_elems(num_elems),
+          fnName(fn), vecParam(vsp)
+    {}
+
+    // Test body returning an OpenCL error code
+    virtual cl_int Run() = 0;
+
+    cl_device_id device;
+    cl_context context;
+    cl_command_queue queue;
+
+    int num_elems;
+    std::string fnName;
+    bool vecParam;
+
+    static std::map<size_t, std::string> type2name;
+};
+
+
+struct MinTest : BaseFunctionTest
+{
+    MinTest(cl_device_id device, cl_context context, cl_command_queue queue,
+            int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct MaxTest : BaseFunctionTest
+{
+    MaxTest(cl_device_id device, cl_context context, cl_command_queue queue,
+            int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct ClampTest : BaseFunctionTest
+{
+    ClampTest(cl_device_id device, cl_context context, cl_command_queue queue,
+              int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct DegreesTest : BaseFunctionTest
+{
+    DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue,
+                int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct RadiansTest : BaseFunctionTest
+{
+    RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue,
+                int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct SignTest : BaseFunctionTest
+{
+    SignTest(cl_device_id device, cl_context context, cl_command_queue queue,
+             int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct SmoothstepTest : BaseFunctionTest
+{
+    SmoothstepTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elems, const char *fn,
+                   bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct StepTest : BaseFunctionTest
+{
+    StepTest(cl_device_id device, cl_context context, cl_command_queue queue,
+             int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+struct MixTest : BaseFunctionTest
+{
+    MixTest(cl_device_id device, cl_context context, cl_command_queue queue,
+            int num_elems, const char *fn, bool vsp)
+        : BaseFunctionTest(device, context, queue, num_elems, fn, vsp)
+    {}
+
+    cl_int Run() override;
+};
+
+
+template <typename... Args>
+std::string string_format(const std::string &format, Args... args)
+{
+    int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1;
+    if (sformat <= 0)
+        throw std::runtime_error("string_format: string processing error.");
+    auto format_size = static_cast<size_t>(sformat);
+    std::unique_ptr<char[]> buffer(new char[format_size]);
+    std::snprintf(buffer.get(), format_size, format.c_str(), args...);
+    return std::string(buffer.get(), buffer.get() + format_size - 1);
+}
+
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements,
+                   const char *fn = "", bool vsp = false)
+{
+    auto test_fixture = T(device, context, queue, num_elements, fn, vsp);
+
+    cl_int error = test_fixture.Run();
+    test_error_ret(error, "Test Failed", TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+#endif // TEST_COMMONFNS_BASE_H
diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp
index b40bf1f6..1eb12f73 100644
--- a/test_conformance/commonfns/test_binary_fn.cpp
+++ b/test_conformance/commonfns/test_binary_fn.cpp
@@ -13,14 +13,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <vector>
+
+#include "harness/deviceInfo.h"
+#include "harness/typeWrappers.h"
 
 #include "procs.h"
+#include "test_base.h"
 
 const char *binary_fn_code_pattern =
 "%s\n" /* optional pragma */
@@ -49,216 +53,286 @@ const char *binary_fn_code_pattern_v3_scalar =
 "    vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"
 "}\n";
 
-int test_binary_fn( cl_device_id device, cl_context context, cl_command_queue queue, int n_elems,
-                    const char *fnName, bool vectorSecondParam,
-                    binary_verify_float_fn floatVerifyFn, binary_verify_double_fn doubleVerifyFn )
+
+template <typename T>
+int test_binary_fn(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int n_elems,
+                   const std::string& fnName, bool vecSecParam,
+                   VerifyFuncBinary<T> verifyFn)
 {
-    cl_mem      streams[6];
-    cl_float      *input_ptr[2], *output_ptr;
-    cl_double     *input_ptr_double[2], *output_ptr_double=NULL;
-    cl_program  *program;
-    cl_kernel   *kernel;
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i, j;
-    MTdata d;
-
-      program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
-      kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    int test_double = 0;
-    if(is_extension_available( device, "cl_khr_fp64" ))
-    {
-        log_info("Testing doubles.\n");
-        test_double = 1;
-    }
+    clMemWrapper streams[3];
+    std::vector<T> input_ptr[2], output_ptr;
 
-    for( i = 0; i < 2; i++ )
-    {
-        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    }
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+    int err, i, j;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
+
+    for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
 
     for( i = 0; i < 3; i++ )
     {
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, &err);
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
         test_error( err, "clCreateBuffer failed");
     }
 
-    if (test_double)
-        for( i = 3; i < 6; i++ )
-        {
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, &err);
-            test_error(err, "clCreateBuffer failed");
-        }
-
-    d = init_genrand( gRandomSeed );
-    for( j = 0; j < num_elements; j++ )
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
     {
-        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
-        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
-        if (test_double)
+        for (j = 0; j < num_elements; j++)
         {
-            input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
-            input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
+            input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
         }
     }
-    free_mtdata(d);     d = NULL;
-
-    for( i = 0; i < 2; i++ )
+    else if (std::is_same<T, double>::value)
     {
-        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
-        test_error( err, "Unable to write input buffer" );
-
-        if (test_double)
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (j = 0; j < num_elements; j++)
         {
-          err = clEnqueueWriteBuffer( queue, streams[ 3 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
-          test_error( err, "Unable to write input buffer" );
+            input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
         }
     }
 
-    for( i = 0; i < kTotalVecCount; i++ )
+    for (i = 0; i < 2; i++)
     {
-        char programSrc[ 10240 ];
-        char vecSizeNames[][ 3 ] = { "", "2", "4", "8", "16", "3" };
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
+    }
 
-        if(i >= kVectorSizeCount) {
-            // do vec3 print
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
 
-            if(vectorSecondParam) {
-            sprintf( programSrc,binary_fn_code_pattern_v3, "", "float", "float", "float", fnName );
-        } else  {
-            sprintf( programSrc,binary_fn_code_pattern_v3_scalar, "", "float", "float", "float", fnName );
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
+        {
+            if (vecSecParam)
+            {
+                std::string str = binary_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), fnName.c_str());
+            }
+            else
+            {
+                std::string str = binary_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), fnName.c_str());
             }
-        } else  {
-            // do regular
-            sprintf( programSrc, binary_fn_code_pattern, "", "float", vecSizeNames[ i ], "float", vectorSecondParam ? vecSizeNames[ i ] : "", "float", vecSizeNames[ i ], fnName );
         }
-        const char *ptr = programSrc;
-        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &ptr, "test_fn" );
-        test_error( err, "Unable to create kernel" );
-
-        if (test_double)
+        else
         {
-        if(i >= kVectorSizeCount) {
-        if(vectorSecondParam) {
-            sprintf( programSrc, binary_fn_code_pattern_v3, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-            "double",  "double",  "double",  fnName );
-        } else {
-
-        sprintf( programSrc, binary_fn_code_pattern_v3_scalar, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-                 "double",  "double",  "double",  fnName );
-        }
-        } else {
-        sprintf( programSrc, binary_fn_code_pattern, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable",
-            "double", vecSizeNames[ i ], "double", vectorSecondParam ? vecSizeNames[ i ] : "", "double", vecSizeNames[ i ], fnName );
-        }
-            ptr = programSrc;
-            err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &ptr, "test_fn" );
-            test_error( err, "Unable to create kernel" );
+            // do regular
+            std::string str = binary_fn_code_pattern;
+            kernelSource = string_format(
+                str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i],
+                tname.c_str(), vecSecParam ? vecSizeNames[i] : "",
+                tname.c_str(), vecSizeNames[i], fnName.c_str());
         }
-    }
+        const char* programPtr = kernelSource.c_str();
+        err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                          (const char**)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
 
-    for( i = 0; i < kTotalVecCount; i++ )
-    {
         for( j = 0; j < 3; j++ )
         {
-            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
             test_error( err, "Unable to set kernel argument" );
         }
 
-        threads[0] = (size_t)n_elems;
+        size_t threads = (size_t)n_elems;
 
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
         test_error( err, "Unable to execute kernel" );
 
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
+        err = clEnqueueReadBuffer(queue, streams[2], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
         test_error( err, "Unable to read results" );
 
-
-
-        if( floatVerifyFn( input_ptr[0], input_ptr[1], output_ptr, n_elems, ((g_arrVecSizes[i])) ) )
+        if (verifyFn((T*)&input_ptr[0].front(), (T*)&input_ptr[1].front(),
+                     &output_ptr[0], n_elems, g_arrVecSizes[i],
+                     vecSecParam ? 1 : 0))
         {
-            log_error(" float%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            log_error("%s %s%d%s test failed\n", fnName.c_str(), tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecSecParam ? "" : std::string(", " + tname).c_str());
             err = -1;
         }
         else
         {
-            log_info(" float%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", float");
+            log_info("%s %s%d%s test passed\n", fnName.c_str(), tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecSecParam ? "" : std::string(", " + tname).c_str());
             err = 0;
         }
 
         if (err)
             break;
     }
+    return err;
+}
+
+namespace {
 
-    if (test_double)
+template <typename T>
+int max_verify(const T* const x, const T* const y, const T* const out,
+               int numElements, int vecSize, int vecParam)
+{
+    for (int i = 0; i < numElements; i++)
     {
-        for( i = 0; i < kTotalVecCount; i++ )
+        for (int j = 0; j < vecSize; j++)
         {
-            for( j = 0; j < 3; j++ )
+            int k = i * vecSize + j;
+            int l = (k * vecParam + i * (1 - vecParam));
+            T v = (x[k] < y[l]) ? y[l] : x[k];
+            if (v != out[k])
             {
-                err = clSetKernelArg( kernel[ kTotalVecCount + i ], j, sizeof( streams[ 3 + j ] ), &streams[ 3 + j ] );
-                test_error( err, "Unable to set kernel argument" );
+                log_error(
+                    "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is "
+                    "vector %d, element %d, for vector size %d)\n",
+                    k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
+                return -1;
             }
+        }
+    }
+    return 0;
+}
 
-            threads[0] = (size_t)n_elems;
-
-            err = clEnqueueNDRangeKernel( queue, kernel[kTotalVecCount + i], 1, NULL, threads, NULL, 0, NULL, NULL );
-            test_error( err, "Unable to execute kernel" );
-
-            err = clEnqueueReadBuffer( queue, streams[5], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
-            test_error( err, "Unable to read results" );
-
-            if( doubleVerifyFn( input_ptr_double[0], input_ptr_double[1], output_ptr_double, n_elems, ((g_arrVecSizes[i]))))
-            {
-                log_error(" double%d%s test failed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
-                err = -1;
-            }
-            else
+template <typename T>
+int min_verify(const T* const x, const T* const y, const T* const out,
+               int numElements, int vecSize, int vecParam)
+{
+    for (int i = 0; i < numElements; i++)
+    {
+        for (int j = 0; j < vecSize; j++)
+        {
+            int k = i * vecSize + j;
+            int l = (k * vecParam + i * (1 - vecParam));
+            T v = (x[k] > y[l]) ? y[l] : x[k];
+            if (v != out[k])
             {
-                log_info(" double%d%s test passed\n", ((g_arrVecSizes[i])), vectorSecondParam ? "" : ", double");
-                err = 0;
+                log_error(
+                    "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is "
+                    "vector %d, element %d, for vector size %d)\n",
+                    k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
+                return -1;
             }
-
-            if (err)
-            break;
         }
     }
+    return 0;
+}
 
+}
 
-    for( i = 0; i < ((test_double) ? 6 : 3); i++ )
-    {
-        clReleaseMemObject(streams[i]);
-    }
-    for (i=0; i < ((test_double) ? kTotalVecCount * 2 : kTotalVecCount) ; i++)
+cl_int MaxTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_binary_fn<float>(device, context, queue, num_elems,
+                                  fnName.c_str(), vecParam, max_verify<float>);
+    test_error(error, "MaxTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
     {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
+        error = test_binary_fn<double>(device, context, queue, num_elems,
+                                       fnName.c_str(), vecParam,
+                                       max_verify<double>);
+        test_error(error, "MaxTest::Run<double> failed");
     }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-      free(program);
-      free(kernel);
 
-    if (test_double)
+    return error;
+}
+
+cl_int MinTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_binary_fn<float>(device, context, queue, num_elems,
+                                  fnName.c_str(), vecParam, min_verify<float>);
+    test_error(error, "MinTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
     {
-        free(input_ptr_double[0]);
-        free(input_ptr_double[1]);
-        free(output_ptr_double);
+        error = test_binary_fn<double>(device, context, queue, num_elems,
+                                       fnName.c_str(), vecParam,
+                                       min_verify<double>);
+        test_error(error, "MinTest::Run<double> failed");
     }
 
-    return err;
+    return error;
+}
+
+int test_min(cl_device_id device, cl_context context, cl_command_queue queue,
+             int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
+                                   true);
 }
 
+int test_minf(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "min",
+                                   false);
+}
 
+int test_fmin(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
+                                   true);
+}
+
+int test_fminf(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<MinTest>(device, context, queue, n_elems, "fmin",
+                                   false);
+}
+
+int test_max(cl_device_id device, cl_context context, cl_command_queue queue,
+             int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
+                                   true);
+}
+
+int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "max",
+                                   false);
+}
+
+int test_fmax(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
+                                   true);
+}
+
+int test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<MaxTest>(device, context, queue, n_elems, "fmax",
+                                   false);
+}
diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp
index bbb83645..0e96fb60 100644
--- a/test_conformance/commonfns/test_clamp.cpp
+++ b/test_conformance/commonfns/test_clamp.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,303 +13,252 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <vector>
+
+#include "harness/deviceInfo.h"
+#include "harness/typeWrappers.h"
 
 #include "procs.h"
+#include "test_base.h"
+
 
 #ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
+#define M_PI 3.14159265358979323846264338327950288
 #endif
 
-#define CLAMP_KERNEL( type )                        \
-    const char *clamp_##type##_kernel_code =                \
-    EMIT_PRAGMA_DIRECTIVE                        \
-    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
-    "{\n"                                \
-    "    int  tid = get_global_id(0);\n"                \
-    "\n"                                \
-    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
-    "}\n";
-
-#define CLAMP_KERNEL_V( type, size)                    \
-    const char *clamp_##type##size##_kernel_code =            \
-    EMIT_PRAGMA_DIRECTIVE                        \
-    "__kernel void test_clamp(__global " #type #size " *x, __global " #type #size " *minval, __global " #type #size " *maxval, __global " #type #size " *dst)\n" \
-    "{\n"                                \
-    "    int  tid = get_global_id(0);\n"                \
-    "\n"                                \
-    "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"    \
-    "}\n";
-
-#define CLAMP_KERNEL_V3( type, size)                    \
-    const char *clamp_##type##size##_kernel_code =            \
-    EMIT_PRAGMA_DIRECTIVE                        \
-    "__kernel void test_clamp(__global " #type " *x, __global " #type " *minval, __global " #type " *maxval, __global " #type " *dst)\n" \
-    "{\n"                                \
-    "    int  tid = get_global_id(0);\n"                \
-    "\n"                                \
-    "    vstore3(clamp(vload3(tid, x), vload3(tid,minval), vload3(tid,maxval)), tid, dst);\n"    \
-    "}\n";
+
+#define CLAMP_KERNEL(type)                                                     \
+    const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE             \
+        "__kernel void test_clamp(__global " #type " *x, __global " #type      \
+        " *minval, __global " #type " *maxval, __global " #type " *dst)\n"     \
+        "{\n"                                                                  \
+        "    int  tid = get_global_id(0);\n"                                   \
+        "\n"                                                                   \
+        "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"            \
+        "}\n";
+
+#define CLAMP_KERNEL_V(type, size)                                             \
+    const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE       \
+        "__kernel void test_clamp(__global " #type #size                       \
+        " *x, __global " #type #size " *minval, __global " #type #size         \
+        " *maxval, __global " #type #size " *dst)\n"                           \
+        "{\n"                                                                  \
+        "    int  tid = get_global_id(0);\n"                                   \
+        "\n"                                                                   \
+        "    dst[tid] = clamp(x[tid], minval[tid], maxval[tid]);\n"            \
+        "}\n";
+
+#define CLAMP_KERNEL_V3(type, size)                                            \
+    const char *clamp_##type##size##_kernel_code = EMIT_PRAGMA_DIRECTIVE       \
+        "__kernel void test_clamp(__global " #type " *x, __global " #type      \
+        " *minval, __global " #type " *maxval, __global " #type " *dst)\n"     \
+        "{\n"                                                                  \
+        "    int  tid = get_global_id(0);\n"                                   \
+        "\n"                                                                   \
+        "    vstore3(clamp(vload3(tid, x), vload3(tid,minval), "               \
+        "vload3(tid,maxval)), tid, dst);\n"                                    \
+        "}\n";
+
 
 #define EMIT_PRAGMA_DIRECTIVE " "
-CLAMP_KERNEL( float )
-CLAMP_KERNEL_V( float, 2 )
-CLAMP_KERNEL_V( float, 4 )
-CLAMP_KERNEL_V( float, 8 )
-CLAMP_KERNEL_V( float, 16 )
-CLAMP_KERNEL_V3( float, 3)
+CLAMP_KERNEL(float)
+CLAMP_KERNEL_V(float, 2)
+CLAMP_KERNEL_V(float, 4)
+CLAMP_KERNEL_V(float, 8)
+CLAMP_KERNEL_V(float, 16)
+CLAMP_KERNEL_V3(float, 3)
 #undef EMIT_PRAGMA_DIRECTIVE
 
 #define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-CLAMP_KERNEL( double )
-CLAMP_KERNEL_V( double, 2 )
-CLAMP_KERNEL_V( double, 4 )
-CLAMP_KERNEL_V( double, 8 )
-CLAMP_KERNEL_V( double, 16 )
-CLAMP_KERNEL_V3( double, 3 )
+CLAMP_KERNEL(double)
+CLAMP_KERNEL_V(double, 2)
+CLAMP_KERNEL_V(double, 4)
+CLAMP_KERNEL_V(double, 8)
+CLAMP_KERNEL_V(double, 16)
+CLAMP_KERNEL_V3(double, 3)
 #undef EMIT_PRAGMA_DIRECTIVE
 
-const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, clamp_float16_kernel_code, clamp_float3_kernel_code };
-const char *clamp_double_codes[] = { clamp_double_kernel_code, clamp_double2_kernel_code, clamp_double4_kernel_code, clamp_double8_kernel_code, clamp_double16_kernel_code, clamp_double3_kernel_code };
+const char *clamp_float_codes[] = {
+    clamp_float_kernel_code,   clamp_float2_kernel_code,
+    clamp_float4_kernel_code,  clamp_float8_kernel_code,
+    clamp_float16_kernel_code, clamp_float3_kernel_code
+};
+const char *clamp_double_codes[] = {
+    clamp_double_kernel_code,   clamp_double2_kernel_code,
+    clamp_double4_kernel_code,  clamp_double8_kernel_code,
+    clamp_double16_kernel_code, clamp_double3_kernel_code
+};
+
+namespace {
 
-static int verify_clamp(float *x, float *minval, float *maxval, float *outptr, int n)
-{
-    float       t;
-    int         i;
 
-    for (i=0; i<n; i++)
+template <typename T>
+int verify_clamp(const T *const x, const T *const minval, const T *const maxval,
+                 const T *const outptr, int n)
+{
+    T t;
+    for (int i = 0; i < n; i++)
     {
-        t = fminf( fmaxf( x[ i ], minval[ i ] ), maxval[ i ] );
+        t = std::min(std::max(x[i], minval[i]), maxval[i]);
         if (t != outptr[i])
         {
-            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
+            log_error(
+                "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i,
+                x[i], minval[i], maxval[i], t, outptr[i]);
             return -1;
         }
     }
 
     return 0;
 }
+}
 
-static int verify_clamp_double(double *x, double *minval, double *maxval, double *outptr, int n)
+
+template <typename T>
+int test_clamp_fn(cl_device_id device, cl_context context,
+                  cl_command_queue queue, int n_elems)
 {
-    double       t;
-    int         i;
+    clMemWrapper streams[4];
+    std::vector<T> input_ptr[3], output_ptr;
 
-    for (i=0; i<n; i++)
-    {
-        t = fmin( fmax( x[ i ], minval[ i ] ), maxval[ i ] );
-        if (t != outptr[i])
-        {
-            log_error( "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, x[i], minval[i], maxval[i], t, outptr[i] );
-            return -1;
-        }
-    }
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
 
-    return 0;
-}
+    int err, i, j;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
 
-int
-test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[8];
-    cl_float      *input_ptr[3], *output_ptr;
-    cl_double     *input_ptr_double[3], *output_ptr_double = NULL;
-    cl_program  *program;
-    cl_kernel   *kernel;
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i, j;
-    MTdata d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount*2);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount*2);
-
-    num_elements = n_elems * (1 << (kVectorSizeCount-1));
-
-    int test_double = 0;
-    if(is_extension_available( device, "cl_khr_fp64" )) {
-    log_info("Testing doubles.\n");
-      test_double = 1;
-    }
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
 
+    int num_elements = n_elems * (1 << (kVectorSizeCount - 1));
 
-    // why does this go from 0 to 2?? -- Oh, I see, there are four function
-    // arguments to the function, and 3 of them are inputs?
-    for( i = 0; i < 3; i++ )
+    for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 4; i++)
     {
-        input_ptr[i] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-        if (test_double) input_ptr_double[i] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
     }
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    if (test_double) output_ptr_double = (cl_double*)malloc(sizeof(cl_double) * num_elements);
 
-    // why does this go from 0 to 3?
-    for( i = 0; i < 4; i++ )
+    if (std::is_same<T, float>::value)
     {
-        streams[i] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[0])
+        for (j = 0; j < num_elements; j++)
         {
-            log_error("clCreateBuffer failed\n");
-            return -1;
+            input_ptr[0][j] = get_random_float(-0x200000, 0x200000, d);
+            input_ptr[1][j] = get_random_float(-0x200000, 0x200000, d);
+            input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x200000, d);
         }
     }
-    if (test_double)
-    for( i = 4; i < 8; i++ )
-        {
-            streams[i] =
-                clCreateBuffer(context, CL_MEM_READ_WRITE,
-                               sizeof(cl_double) * num_elements, NULL, NULL);
-            if (!streams[0])
-            {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-            }
-        }
-
-    d = init_genrand( gRandomSeed );
-    for( j = 0; j < num_elements; j++ )
+    else if (std::is_same<T, double>::value)
     {
-        input_ptr[0][j] = get_random_float(-0x20000000, 0x20000000, d);
-        input_ptr[1][j] = get_random_float(-0x20000000, 0x20000000, d);
-        input_ptr[2][j] = get_random_float(input_ptr[1][j], 0x20000000, d);
-
-        if (test_double) {
-        input_ptr_double[0][j] = get_random_double(-0x20000000, 0x20000000, d);
-        input_ptr_double[1][j] = get_random_double(-0x20000000, 0x20000000, d);
-        input_ptr_double[2][j] = get_random_double(input_ptr_double[1][j], 0x20000000, d);
+        for (j = 0; j < num_elements; j++)
+        {
+            input_ptr[0][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
+            input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d);
         }
     }
-    free_mtdata(d); d = NULL;
 
-    for( i = 0; i < 3; i++ )
+    for (i = 0; i < 3; i++)
     {
-        err = clEnqueueWriteBuffer( queue, streams[ i ], CL_TRUE, 0, sizeof( cl_float ) * num_elements, input_ptr[ i ], 0, NULL, NULL );
-        test_error( err, "Unable to write input buffer" );
-
-        if (test_double) {
-        err = clEnqueueWriteBuffer( queue, streams[ 4 + i ], CL_TRUE, 0, sizeof( cl_double ) * num_elements, input_ptr_double[ i ], 0, NULL, NULL );
-        test_error( err, "Unable to write input buffer" );
-        }
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
     }
 
-    for( i = 0; i < kTotalVecCount; i++ )
+    for (i = 0; i < kTotalVecCount; i++)
     {
-        err = create_single_kernel_helper( context, &program[ i ], &kernel[ i ], 1, &clamp_float_codes[ i ], "test_clamp" );
-        test_error( err, "Unable to create kernel" );
-
-        log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], i);
-        fflush(stdout);
+        if (std::is_same<T, float>::value)
+        {
+            err = create_single_kernel_helper(
+                context, &programs[i], &kernels[i], 1, &clamp_float_codes[i],
+                "test_clamp");
+            test_error(err, "Unable to create kernel");
+        }
+        else if (std::is_same<T, double>::value)
+        {
+            err = create_single_kernel_helper(
+                context, &programs[i], &kernels[i], 1, &clamp_double_codes[i],
+                "test_clamp");
+            test_error(err, "Unable to create kernel");
+        }
 
-        if (test_double) {
-        err = create_single_kernel_helper( context, &program[ kTotalVecCount + i ], &kernel[ kTotalVecCount + i ], 1, &clamp_double_codes[ i ], "test_clamp" );
-        log_info("Just made a program for double, i=%d, size=%d, in slot %d\n", i, g_arrVecSizes[i], kTotalVecCount+i);
+        log_info("Just made a program for float, i=%d, size=%d, in slot %d\n",
+                 i, g_arrVecSizes[i], i);
         fflush(stdout);
-        test_error( err, "Unable to create kernel" );
-        }
-    }
 
-    for( i = 0; i < kTotalVecCount; i++ )
-    {
-        for( j = 0; j < 4; j++ )
+        for (j = 0; j < 4; j++)
         {
-            err = clSetKernelArg( kernel[ i ], j, sizeof( streams[ j ] ), &streams[ j ] );
-            test_error( err, "Unable to set kernel argument" );
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
         }
 
-        threads[0] = (size_t)n_elems;
+        size_t threads = (size_t)n_elems;
 
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        test_error( err, "Unable to execute kernel" );
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
 
-        err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        test_error( err, "Unable to read results" );
+        err = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
 
-        if (verify_clamp(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems*((g_arrVecSizes[i]))))
+        if (verify_clamp<T>((T *)&input_ptr[0].front(),
+                            (T *)&input_ptr[1].front(),
+                            (T *)&input_ptr[2].front(), (T *)&output_ptr[0],
+                            n_elems * ((g_arrVecSizes[i]))))
         {
-            log_error("CLAMP float%d test failed\n", ((g_arrVecSizes[i])));
+            log_error("CLAMP %s%d test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])));
             err = -1;
         }
         else
         {
-            log_info("CLAMP float%d test passed\n", ((g_arrVecSizes[i])));
+            log_info("CLAMP %s%d test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])));
             err = 0;
         }
 
-
-
-        if (err)
-        break;
+        if (err) break;
     }
 
-    // If the device supports double precision then test that
-    if (test_double)
-    {
-        for( ; i < 2*kTotalVecCount; i++ )
-        {
+    return err;
+}
 
-            log_info("Start of test_double loop, i is %d\n", i);
-            for( j = 0; j < 4; j++ )
-            {
-                err = clSetKernelArg( kernel[i], j, sizeof( streams[j+4] ), &streams[j+4] );
-                test_error( err, "Unable to set kernel argument" );
-            }
-
-            threads[0] = (size_t)n_elems;
-
-            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-            test_error( err, "Unable to execute kernel" );
-
-            err = clEnqueueReadBuffer( queue, streams[7], CL_TRUE, 0, sizeof(cl_double)*num_elements, (void *)output_ptr_double, 0, NULL, NULL );
-            test_error( err, "Unable to read results" );
-
-            if (verify_clamp_double(input_ptr_double[0], input_ptr_double[1], input_ptr_double[2], output_ptr_double, n_elems*g_arrVecSizes[(i-kTotalVecCount)]))
-            {
-                log_error("CLAMP double%d test failed\n", g_arrVecSizes[(i-kTotalVecCount)]);
-                err = -1;
-            }
-            else
-            {
-                log_info("CLAMP double%d test passed\n", g_arrVecSizes[(i-kTotalVecCount)]);
-                err = 0;
-            }
-
-            if (err)
-            break;
-        }
-    }
 
+cl_int ClampTest::Run()
+{
+    cl_int error = CL_SUCCESS;
 
-    for( i = 0; i < ((test_double) ? 8 : 4); i++ )
-    {
-        clReleaseMemObject(streams[i]);
-    }
-    for (i=0; i < ((test_double) ? kTotalVecCount * 2-1 : kTotalVecCount); i++)
+    error = test_clamp_fn<float>(device, context, queue, num_elems);
+    test_error(error, "ClampTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
     {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
-    free(program);
-    free(kernel);
-    if (test_double) {
-        free(input_ptr_double[0]);
-        free(input_ptr_double[1]);
-        free(input_ptr_double[2]);
-        free(output_ptr_double);
+        error = test_clamp_fn<double>(device, context, queue, num_elems);
+        test_error(error, "ClampTest::Run<double> failed");
     }
 
-    return err;
+    return error;
 }
 
 
+int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<ClampTest>(device, context, queue, n_elems);
+}
diff --git a/test_conformance/commonfns/test_degrees.cpp b/test_conformance/commonfns/test_degrees.cpp
deleted file mode 100644
index 7360c034..00000000
--- a/test_conformance/commonfns/test_degrees.cpp
+++ /dev/null
@@ -1,476 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
-#endif
-
-static int test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *degrees_kernel_code =
-"__kernel void test_degrees(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees2_kernel_code =
-"__kernel void test_degrees2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees4_kernel_code =
-"__kernel void test_degrees4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees8_kernel_code =
-"__kernel void test_degrees8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees16_kernel_code =
-"__kernel void test_degrees16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees3_kernel_code =
-"__kernel void test_degrees3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static int
-verify_degrees(float *inptr, float *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (180.0 / M_PI) * inptr[i];
-        error = Ulp_Error( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-int
-test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_float     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[2];
-    size_t threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata        d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code, "test_degrees" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code, "test_degrees2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code, "test_degrees4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code, "test_degrees8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code, "test_degrees16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code, "test_degrees3" );
-    if (err)
-        return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-
-        // Line below is troublesome...
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_degrees(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("DEGREES float%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("DEGREES float%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    if( err )
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ) )
-    {
-        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
-        return 0;
-    }
-
-    return test_degrees_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-const char *degrees_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = degrees(src[tid]);\n"
-"}\n";
-
-const char *degrees3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_degrees3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(degrees(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static int
-verify_degrees_double(double *inptr, double *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (180.0L / 3.14159265358979323846264338327950288L) * inptr[i];
-        error = Ulp_Error_Double( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "degreesd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-static int
-test_degrees_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_double    *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[2];
-    size_t threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata        d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    // TODO: line below is clearly wrong
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double((-100000. * M_PI), (100000. * M_PI) ,d);
-
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &degrees_kernel_code_double, "test_degrees_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &degrees2_kernel_code_double, "test_degrees2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &degrees4_kernel_code_double, "test_degrees4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &degrees8_kernel_code_double, "test_degrees8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &degrees16_kernel_code_double, "test_degrees16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &degrees3_kernel_code_double, "test_degrees3_double" );
-    if (err)
-        return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-
-        // Line below is troublesome...
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_degrees_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("DEGREES double%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("DEGREES double%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-
diff --git a/test_conformance/commonfns/test_fmax.cpp b/test_conformance/commonfns/test_fmax.cpp
deleted file mode 100644
index 2441e695..00000000
--- a/test_conformance/commonfns/test_fmax.cpp
+++ /dev/null
@@ -1,238 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmax_kernel_code =
-    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax2_kernel_code =
-    "__kernel void test_fmax2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax4_kernel_code =
-    "__kernel void test_fmax4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax8_kernel_code =
-    "__kernel void test_fmax8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax16_kernel_code =
-    "__kernel void test_fmax16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-
-static const char *fmax3_kernel_code =
-    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmax(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmax(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = (inptrA[i] >= inptrB[i]) ? inptrA[i] : inptrB[i];
-        if (r != outptr[i])
-        return -1;
-    }
-
-    return 0;
-}
-
-int
-test_fmax(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float     *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000,d );
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
-    if (err)
-    return -1;
-
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
-        {
-            log_error("FMAX float%d test failed\n", (g_arrVecSizes[i]));
-            err = -1;
-        }
-        else
-        {
-            log_info("FMAX float%d test passed\n", (g_arrVecSizes[i]));
-            err = 0;
-        }
-
-        if (err)
-        break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_fmaxf.cpp b/test_conformance/commonfns/test_fmaxf.cpp
deleted file mode 100644
index 1aed5390..00000000
--- a/test_conformance/commonfns/test_fmaxf.cpp
+++ /dev/null
@@ -1,248 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmax_kernel_code =
-    "__kernel void test_fmax(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax2_kernel_code =
-    "__kernel void test_fmax2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax4_kernel_code =
-    "__kernel void test_fmax4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax8_kernel_code =
-    "__kernel void test_fmax8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax16_kernel_code =
-    "__kernel void test_fmax16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmax(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmax3_kernel_code =
-    "__kernel void test_fmax3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmax(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmax(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-
-    for (i=0; i<n; ) {
-        int ii = i/veclen;
-        for (j=0; j<veclen && i<n; ++j, ++i) {
-            r = (inptrA[i] >= inptrB[ii]) ? inptrA[i] : inptrB[ii];
-            if (r != outptr[i]) {
-                log_info("Verify noted discrepancy at %d (of %d) (vec %d, pos %d)\n",
-                         i,n,ii,j);
-                log_info("SHould be %f, is %f\n", r, outptr[i]);
-                log_info("Taking max of (%f,%f)\n", inptrA[i], inptrB[i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int
-test_fmaxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-        streams[1] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[1])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-        streams[2] =
-            clCreateBuffer(context, CL_MEM_READ_WRITE,
-                           sizeof(cl_float) * num_elements, NULL, NULL);
-        if (!streams[2])
-        {
-            log_error("clCreateBuffer failed\n");
-            return -1;
-        }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-        {
-            p[i] = get_random_float(-0x20000000, 0x20000000, d);
-        }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        {
-            p[i] = get_random_float(-0x20000000, 0x20000000, d);
-        }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-                                (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-        {
-            log_error("clWriteArray failed\n");
-            return -1;
-        }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-                                (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-        {
-            log_error("clWriteArray failed\n");
-            return -1;
-        }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmax_kernel_code, "test_fmax" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmax2_kernel_code, "test_fmax2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmax4_kernel_code, "test_fmax4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmax8_kernel_code, "test_fmax8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmax16_kernel_code, "test_fmax16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmax3_kernel_code, "test_fmax3" );
-    if (err)
-        return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i < kTotalVecCount; i++)
-        {
-            err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-            err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-            err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-            if (err != CL_SUCCESS)
-                {
-                    log_error("clSetKernelArgs failed\n");
-                    return -1;
-                }
-        }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-        {
-            err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-            if (err != CL_SUCCESS)
-                {
-                    log_error("clEnqueueNDRangeKernel failed\n");
-                    return -1;
-                }
-
-            err = clEnqueueReadBuffer(queue, streams[2], true, 0, sizeof(cl_float)*num_elements,
-                                      output_ptr, 0, NULL, NULL);
-            if (err != CL_SUCCESS)
-                {
-                    log_error("clEnqueueReadBuffer failed\n");
-                    return -1;
-                }
-
-            if (verify_fmax(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
-                {
-                    log_error("FMAX float%d,float test failed\n", (g_arrVecSizes[i]));
-                    err = -1;
-                }
-            else
-                {
-                    log_info("FMAX float%d,float test passed\n", (g_arrVecSizes[i]));
-                    err = 0;
-                }
-
-            if (err)
-                break;
-        }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-        {
-            clReleaseKernel(kernel[i]);
-            clReleaseProgram(program[i]);
-        }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_fmin.cpp b/test_conformance/commonfns/test_fmin.cpp
deleted file mode 100644
index 19bc7b65..00000000
--- a/test_conformance/commonfns/test_fmin.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmin_kernel_code =
-    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin2_kernel_code =
-    "__kernel void test_fmin2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin4_kernel_code =
-    "__kernel void test_fmin4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin8_kernel_code =
-    "__kernel void test_fmin8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin16_kernel_code =
-    "__kernel void test_fmin16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-
-static const char *fmin3_kernel_code =
-    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmin(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-    "}\n";
-
-int
-verify_fmin(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = (inptrA[i] > inptrB[i]) ? inptrB[i] : inptrA[i];
-        if (r != outptr[i])
-        return -1;
-    }
-
-    return 0;
-}
-
-int
-test_fmin(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));;
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
-    if (err)
-    return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i]))))
-        {
-            log_error("FMIN float%d test failed\n", (g_arrVecSizes[i]));
-            err = -1;
-        }
-        else
-        {
-            log_info("FMIN float%d test passed\n", (g_arrVecSizes[i]));
-            err = 0;
-        }
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_fminf.cpp b/test_conformance/commonfns/test_fminf.cpp
deleted file mode 100644
index e0e455ab..00000000
--- a/test_conformance/commonfns/test_fminf.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *fmin_kernel_code =
-    "__kernel void test_fmin(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin2_kernel_code =
-    "__kernel void test_fmin2(__global float2 *srcA, __global float *srcB, __global float2 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin4_kernel_code =
-    "__kernel void test_fmin4(__global float4 *srcA, __global float *srcB, __global float4 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin8_kernel_code =
-    "__kernel void test_fmin8(__global float8 *srcA, __global float *srcB, __global float8 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin16_kernel_code =
-    "__kernel void test_fmin16(__global float16 *srcA, __global float *srcB, __global float16 *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    dst[tid] = fmin(srcA[tid], srcB[tid]);\n"
-    "}\n";
-
-static const char *fmin3_kernel_code =
-    "__kernel void test_fmin3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "    vstore3(fmin(vload3(tid,srcA), srcB[tid]),tid,dst);\n"
-    "}\n";
-
-static int
-verify_fmin(float *inptrA, float *inptrB, float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-
-    for (i=0; i<n; ) {
-    int ii = i/veclen;
-    for (j=0; j<veclen && i<n; ++j, ++i) {
-        r = (inptrA[i] > inptrB[ii]) ? inptrB[ii] : inptrA[i];
-        if (r != outptr[i])
-        return -1;
-    }
-    }
-
-    return 0;
-}
-
-int
-test_fminf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[3];
-    cl_float     *input_ptr[2], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata      d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    d = init_genrand( gRandomSeed );
-    p = input_ptr[0];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x20000000, 0x20000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements,
-                (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &fmin_kernel_code, "test_fmin" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &fmin2_kernel_code, "test_fmin2" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &fmin4_kernel_code, "test_fmin4" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &fmin8_kernel_code, "test_fmin8" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &fmin16_kernel_code, "test_fmin16" );
-    if (err)
-    return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &fmin3_kernel_code, "test_fmin3" );
-    if (err)
-    return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_fmin(input_ptr[0], input_ptr[1], output_ptr, n_elems*((g_arrVecSizes[i])), (g_arrVecSizes[i])))
-        {
-            log_error("fmin float%d,float test failed\n", (g_arrVecSizes[i]));
-            err = -1;
-        }
-        else
-        {
-            log_info("fmin float%d,float test passed\n", (g_arrVecSizes[i]));
-            err = 0;
-        }
-
-        if (err)
-        break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_max.cpp b/test_conformance/commonfns/test_max.cpp
deleted file mode 100644
index 9f3b80ec..00000000
--- a/test_conformance/commonfns/test_max.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        float v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] )
-        {
-            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
-                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        double v = ( x[ i ] < y[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] )
-        {
-            log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n",
-                i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-        }
-    }
-    return 0;
-}
-
-int test_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "max", true, max_verify_float, max_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_maxf.cpp b/test_conformance/commonfns/test_maxf.cpp
deleted file mode 100644
index f96df7ea..00000000
--- a/test_conformance/commonfns/test_maxf.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int max_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            float v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if( v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int max_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            double v = ( x[ i * vecSize + j ] < y[ i ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if(    v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t max(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-int test_maxf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "max", false, max_verify_float, max_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_min.cpp b/test_conformance/commonfns/test_min.cpp
deleted file mode 100644
index 707e24b6..00000000
--- a/test_conformance/commonfns/test_min.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        float v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] ) {
-      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-    }
-    }
-    return 0;
-}
-
-static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements * vecSize; i++ )
-    {
-        double v = ( y[ i ] < x[ i ] ) ? y[ i ] : x[ i ];
-        if( v != out[ i ] ) {
-      log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is vector %d, element %d, for vector size %d)\n", i, x[i], i, y[i], i, out[i], v, i, i/vecSize, i%vecSize, vecSize);
-            return -1;
-    }
-    }
-    return 0;
-}
-
-int test_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "min", true, min_verify_float, min_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_minf.cpp b/test_conformance/commonfns/test_minf.cpp
deleted file mode 100644
index 71b1fbe0..00000000
--- a/test_conformance/commonfns/test_minf.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-#include "harness/errorHelpers.h"
-
-static int min_verify_float( float *x, float *y, float *out, int numElements, int vecSize )
-{
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            float v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if( v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int min_verify_double( double *x, double *y, double *out, int numElements, int vecSize )
-{
-    int maxFail = 1;
-    int numFails = 0;
-    for( int i = 0; i < numElements; i++ )
-    {
-        for( int j = 0; j < vecSize; j++ )
-        {
-            double v = ( y[ i ] < x[ i * vecSize + j ] ) ? y[ i ] : x[ i * vecSize + j ];
-            if(    v != out[ i * vecSize + j ] )
-            {
-                log_error( "Failure for vector size %d at position %d, element %d:\n\t min(%a, %a) = *%a vs %a\n", vecSize, i, j, x[ i * vecSize + j ], y[i], v,  out[ i * vecSize + j ] );
-                ++numFails;
-                if(numFails >= maxFail) {
-                return -1;
-            }
-        }
-    }
-    }
-    return 0;
-}
-
-int test_minf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    return test_binary_fn( device, context, queue, n_elems, "min", false, min_verify_float, min_verify_double );
-}
-
-
diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp
index 51baac40..92c10100 100644
--- a/test_conformance/commonfns/test_mix.cpp
+++ b/test_conformance/commonfns/test_mix.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,187 +13,265 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
-
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include "procs.h"
+#include "test_base.h"
+
+
+const char *mix_fn_code_pattern =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s%s *x, __global %s%s *y, __global %s%s "
+    "*a, __global %s%s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    dst[tid] = mix(x[tid], y[tid], a[tid]);\n"
+    "}\n";
+
+const char *mix_fn_code_pattern_v3 =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(mix(vload3(tid, x), vload3(tid, y), vload3(tid, a)), tid, "
+    "dst);\n"
+    "}\n";
+
+const char *mix_fn_code_pattern_v3_scalar =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *x, __global %s *y, __global %s *a, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n"
+    "}\n";
 
-const char *mix_kernel_code =
-"__kernel void test_mix(__global float *srcA, __global float *srcB, __global float *srcC, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = mix(srcA[tid], srcB[tid], srcC[tid]);\n"
-"}\n";
 
 #define MAX_ERR 1e-3
 
-float
-verify_mix(float *inptrA, float *inptrB, float *inptrC, float *outptr, int n)
-{
-    float       r, delta, max_err = 0.0f;
-    int         i;
+namespace {
 
-    for (i=0; i<n; i++)
-    {
-        r = inptrA[i] + ((inptrB[i] - inptrA[i]) * inptrC[i]);
-        delta = fabsf(r - outptr[i]) / r;
-        if(delta > max_err) max_err = delta;
-    }
-    return max_err;
-}
 
-int
-test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+template <typename T>
+int verify_mix(const T *const inptrX, const T *const inptrY,
+               const T *const inptrA, const T *const outptr, const int n,
+               const int veclen, const bool vecParam)
 {
-    cl_mem            streams[4];
-    cl_float        *input_ptr[3], *output_ptr, *p;
-    cl_program        program;
-    cl_kernel        kernel;
-    void            *values[4];
-    size_t            lengths[1];
-    size_t    threads[1];
-    float            max_err;
-    int                err;
-    int                i;
-    MTdata          d;
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
+    T r;
+    float delta = 0.0f;
+    int i;
+
+    if (vecParam)
     {
-        log_error("clCreateBuffer failed\n");
-        return -1;
+        for (i = 0; i < n * veclen; i++)
+        {
+            r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]);
+            delta = fabs(double(r - outptr[i])) / r;
+            if (delta > MAX_ERR)
+            {
+                log_error(
+                    "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i,
+                    inptrX[i], inptrY[i], inptrA[i], r, outptr[i]);
+                return -1;
+            }
+        }
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
+    else
     {
-        log_error("clCreateBuffer failed\n");
-        return -1;
+        for (int i = 0; i < n; ++i)
+        {
+            int ii = i / veclen;
+            int vi = i * veclen;
+            for (int j = 0; j < veclen; ++j, ++vi)
+            {
+                r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]);
+                delta = fabs(double(r - outptr[vi])) / r;
+                if (delta > MAX_ERR)
+                {
+                    log_error("{%d, element %d}) verification error: mix(%a, "
+                              "%a, %a) = *%a vs. %a\n",
+                              ii, j, inptrX[vi], inptrY[vi], inptrA[i], r,
+                              outptr[vi]);
+                    return -1;
+                }
+            }
+        }
     }
 
-    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[3])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
+    return 0;
+}
+} // namespace
 
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] =  (float) genrand_real1(d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = (float) genrand_real1(d);
-    }
-    p = input_ptr[2];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = (float) genrand_real1(d);
-    }
-    free_mtdata(d); d = NULL;
 
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
+template <typename T>
+int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
+                int n_elems, bool vecParam)
+{
+    clMemWrapper streams[4];
+    std::vector<T> input_ptr[3], output_ptr;
+
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
 
-    lengths[0] = strlen(mix_kernel_code);
-    err = create_single_kernel_helper( context, &program, &kernel, 1, &mix_kernel_code, "test_mix" );
-    test_error( err, "Unable to create test kernel" );
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
 
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
 
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    values[3] = streams[3];
-  err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-  err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-  err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2] );
-  err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3] );
-    if (err != CL_SUCCESS)
+
+    for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 4; i++)
     {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
     }
 
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
+    for (i = 0; i < num_elements; i++)
     {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
+        input_ptr[0][i] = (T)genrand_real1(d);
+        input_ptr[1][i] = (T)genrand_real1(d);
+        input_ptr[2][i] = (T)genrand_real1(d);
     }
 
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
+    std::string pragma_str;
+    if (std::is_same<T, double>::value)
     {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
     }
 
-    max_err = verify_mix(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, num_elements);
-    if (max_err > MAX_ERR)
+    for (i = 0; i < 3; i++)
     {
-        log_error("MIX test failed %g max err\n", max_err);
-        err = -1;
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
     }
-    else
+
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+
+    for (i = 0; i < kTotalVecCount; i++)
     {
-        log_info("MIX test passed %g max err\n", max_err);
-        err = 0;
-    }
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
+        {
+            if (vecParam)
+            {
+                std::string str = mix_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+            else
+            {
+                std::string str = mix_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+        }
+        else
+        {
+            // regular path
+            std::string str = mix_fn_code_pattern;
+            kernelSource =
+                string_format(str, pragma_str.c_str(), tname.c_str(),
+                              vecSizeNames[i], tname.c_str(), vecSizeNames[i],
+                              tname.c_str(), vecParam ? vecSizeNames[i] : "",
+                              tname.c_str(), vecSizeNames[i]);
+        }
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
+
+        for (int j = 0; j < 4; j++)
+        {
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
+        }
+
+        size_t threads = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
+
+        err = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
 
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    clReleaseMemObject(streams[3]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(input_ptr[2]);
-    free(output_ptr);
+        if (verify_mix(&input_ptr[0].front(), &input_ptr[1].front(),
+                       &input_ptr[2].front(), &output_ptr.front(), n_elems,
+                       g_arrVecSizes[i], vecParam))
+        {
+            log_error("mix %s%d%s test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecParam ? "" : std::string(", " + tname).c_str());
+            err = -1;
+        }
+        else
+        {
+            log_info("mix %s%d%s test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecParam ? "" : std::string(", " + tname).c_str());
+            err = 0;
+        }
+
+        if (err) break;
+    }
 
     return err;
 }
 
 
+cl_int MixTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_mix_fn<float>(device, context, queue, num_elems, vecParam);
+    test_error(error, "MixTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error =
+            test_mix_fn<double>(device, context, queue, num_elems, vecParam);
+        test_error(error, "MixTest::Run<double> failed");
+    }
+
+    return error;
+}
+
 
+int test_mix(cl_device_id device, cl_context context, cl_command_queue queue,
+             int n_elems)
+{
+    return MakeAndRunTest<MixTest>(device, context, queue, n_elems, "mix",
+                                   true);
+}
 
 
+int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<MixTest>(device, context, queue, n_elems, "mix",
+                                   false);
+}
diff --git a/test_conformance/commonfns/test_radians.cpp b/test_conformance/commonfns/test_radians.cpp
deleted file mode 100644
index 0a580c19..00000000
--- a/test_conformance/commonfns/test_radians.cpp
+++ /dev/null
@@ -1,474 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-#ifndef M_PI
-#define M_PI    3.14159265358979323846264338327950288
-#endif
-
-static int test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *radians_kernel_code =
-"__kernel void test_radians(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians2_kernel_code =
-"__kernel void test_radians2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians4_kernel_code =
-"__kernel void test_radians4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians8_kernel_code =
-"__kernel void test_radians8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians16_kernel_code =
-"__kernel void test_radians16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians3_kernel_code =
-"__kernel void test_radians3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static float
-verify_radians(float *inptr, float *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (M_PI / 180.0) * inptr[i];
-        error = Ulp_Error( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-
-int
-test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_float     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void         *values[2];
-    size_t       threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code, "test_radians" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code, "test_radians2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code, "test_radians4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code, "test_radians8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code, "test_radians16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code, "test_radians3" );
-    if (err)
-        return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_radians(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("RADIANS float%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("RADIANS float%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-    if( err )
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ) )
-    {
-        log_info( "Skipping double -- cl_khr_fp64 is not supported by this device.\n" );
-        return 0;
-    }
-
-    return test_radians_double( device,  context,  queue,  n_elems);
-}
-
-
-
-#pragma mark -
-
-const char *radians_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = radians(src[tid]);\n"
-"}\n";
-
-const char *radians3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_radians3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(radians(vload3(tid,src)),tid,dst);\n"
-"}\n";
-
-
-#define MAX_ERR  2.0f
-
-static double
-verify_radians_double(double *inptr, double *outptr, int n)
-{
-    float error, max_error = 0.0f;
-    double   r, max_val = NAN;
-    int     i, j, max_index = 0;
-
-    for (i=0,j=0; i<n; i++,j++)
-    {
-        r = (3.14159265358979323846264338327950288L / 180.0L) * inptr[i];
-        error = Ulp_Error_Double( outptr[i], r );
-        if( fabsf(error) > max_error)
-        {
-            max_error = error;
-            max_index = i;
-            max_val = r;
-            if( fabsf(error) > MAX_ERR)
-            {
-                log_error( "%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n", i, inptr[i], r, outptr[i], r, outptr[i], error );
-                return 1;
-            }
-        }
-    }
-
-    log_info( "radiansd: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n", max_error, max_index, max_val, outptr[max_index], max_val, outptr[max_index] );
-
-    return 0;
-}
-
-
-int
-test_radians_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_double     *input_ptr[1], *output_ptr, *p;
-    cl_program   *program;
-    cl_kernel    *kernel;
-    void         *values[2];
-    size_t       threads[1];
-    int          num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-
-    program = (cl_program*)malloc(sizeof(cl_program)*kTotalVecCount);
-    kernel = (cl_kernel*)malloc(sizeof(cl_kernel)*kTotalVecCount);
-
-    //TODO: line below is clearly wrong
-    num_elements = n_elems * (1 << (kTotalVecCount-1));
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double((float)(-100000.0 * M_PI), (float)(100000.0 * M_PI) ,d);
-
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &radians_kernel_code_double, "test_radians_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &radians2_kernel_code_double, "test_radians2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &radians4_kernel_code_double, "test_radians4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &radians8_kernel_code_double, "test_radians8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &radians16_kernel_code_double, "test_radians16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &radians3_kernel_code_double, "test_radians3_double" );
-    if (err)
-        return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    for (i=0; i < kTotalVecCount; i++)
-    {
-        threads[0] = (size_t)num_elements / ((g_arrVecSizes[i]));
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        cl_uint dead = 0xdeaddead;
-        memset_pattern4(output_ptr, &dead, sizeof(cl_double)*num_elements);
-        err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        if (verify_radians_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-        {
-            log_error("RADIANS double%d test failed\n",((g_arrVecSizes[i])));
-            err = -1;
-        }
-        else
-        {
-            log_info("RADIANS double%d test passed\n", ((g_arrVecSizes[i])));
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    for (i=0; i < kTotalVecCount; i++) {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(program);
-    free(kernel);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
diff --git a/test_conformance/commonfns/test_sign.cpp b/test_conformance/commonfns/test_sign.cpp
deleted file mode 100644
index 6dba58da..00000000
--- a/test_conformance/commonfns/test_sign.cpp
+++ /dev/null
@@ -1,443 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int
-test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *sign_kernel_code =
-"__kernel void test_sign(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign2_kernel_code =
-"__kernel void test_sign2(__global float2 *src, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign4_kernel_code =
-"__kernel void test_sign4(__global float4 *src, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign8_kernel_code =
-"__kernel void test_sign8(__global float8 *src, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign16_kernel_code =
-"__kernel void test_sign16(__global float16 *src, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign3_kernel_code =
-"__kernel void test_sign3(__global float *src, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
-"}\n";
-
-
-
-static int
-verify_sign(float *inptr, float *outptr, int n)
-{
-  float       r;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-    if (inptr[i] > 0.0f)
-      r = 1.0f;
-    else if (inptr[i] < 0.0f)
-      r = -1.0f;
-    else
-      r = 0.0f;
-    if (r != outptr[i])
-      return -1;
-  }
-
-  return 0;
-}
-
-static const char *fn_names[] = { "SIGN float", "SIGN float2", "SIGN float4", "SIGN float8", "SIGN float16", "SIGN float3" };
-
-int
-test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[2];
-  cl_float    *input_ptr[1], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  void        *values[2];
-  size_t  threads[1];
-  int num_elements;
-  int err;
-  int i;
-  MTdata    d;
-
-  num_elements = n_elems * 16;
-
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x20000000, 0x20000000, d);
-  }
-  free_mtdata(d);   d = NULL;
-
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code, "test_sign" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code, "test_sign2" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code, "test_sign4" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code, "test_sign8" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code, "test_sign16" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code, "test_sign3" );
-  if (err)
-    return -1;
-
-  values[0] = streams[0];
-  values[1] = streams[1];
-  for (i=0; i<kTotalVecCount; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++) // change this so we test all
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    if (verify_sign(input_ptr[0], output_ptr, n_elems*(i+1)))
-    {
-      log_error("%s test failed\n", fn_names[i]);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed\n", fn_names[i]);
-      err = 0;
-    }
-
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(output_ptr);
-
-  if (err) return err;
-
-  if (!is_extension_available(device, "cl_khr_fp64"))
-  {
-      log_info("skipping double test -- cl_khr_fp64 not supported.\n");
-      return 0;
-  }
-
-    return test_sign_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-const char *sign_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign2_double(__global double2 *src, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign4_double(__global double4 *src, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign8_double(__global double8 *src, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign16_double(__global double16 *src, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = sign(src[tid]);\n"
-"}\n";
-
-const char *sign3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_sign3_double(__global double *src, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(sign(vload3(tid,src)), tid, dst);\n"
-"}\n";
-
-
-static int
-verify_sign_double(double *inptr, double *outptr, int n)
-{
-  double       r;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-    if (inptr[i] > 0.0)
-      r = 1.0;
-    else if (inptr[i] < 0.0)
-      r = -1.0;
-    else
-      r = 0.0f;
-    if (r != outptr[i])
-      return -1;
-  }
-
-  return 0;
-}
-
-static const char *fn_names_double[] = { "SIGN double", "SIGN double2", "SIGN double4", "SIGN double8", "SIGN double16", "SIGN double3" };
-
-int
-test_sign_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[2];
-  cl_double    *input_ptr[1], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  void        *values[2];
-  size_t  threads[1];
-  int num_elements;
-  int err;
-  int i;
-  MTdata    d;
-
-  num_elements = n_elems * 16;
-
-  input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_double) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-    p[i] = get_random_double(-0x20000000, 0x20000000, d);
-
-  free_mtdata(d);   d = NULL;
-
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &sign_kernel_code_double, "test_sign_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &sign2_kernel_code_double, "test_sign2_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &sign4_kernel_code_double, "test_sign4_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &sign8_kernel_code_double, "test_sign8_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &sign16_kernel_code_double, "test_sign16_double" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &sign3_kernel_code_double, "test_sign3_double" );
-  if (err)
-    return -1;
-
-  values[0] = streams[0];
-  values[1] = streams[1];
-  for (i=0; i<kTotalVecCount; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++) // this hsould be changed
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    if (verify_sign_double(input_ptr[0], output_ptr, n_elems*(i+1)))
-    {
-      log_error("%s test failed\n", fn_names_double[i]);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed\n", fn_names_double[i]);
-      err = 0;
-    }
-
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(output_ptr);
-
-  return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp
index c0cc1d40..31948d3f 100644
--- a/test_conformance/commonfns/test_smoothstep.cpp
+++ b/test_conformance/commonfns/test_smoothstep.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,270 +13,283 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
-
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include "procs.h"
+#include "test_base.h"
+
+
+const char *smoothstep_fn_code_pattern =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s "
+    "*x, __global %s%s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = smoothstep(e0[tid], e1[tid], x[tid]);\n"
+    "}\n";
+
+const char *smoothstep_fn_code_pattern_v3 =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(smoothstep(vload3(tid,e0), vload3(tid,e1), vload3(tid,x)), "
+    "tid, dst);\n"
+    "}\n";
+
+const char *smoothstep_fn_code_pattern_v3_scalar =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *e0, __global %s *e1, __global %s *x, "
+    "__global %s *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n"
+    "}\n";
 
-static const char *smoothstep_kernel_code =
-"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep2_kernel_code =
-"__kernel void test_smoothstep2(__global float2 *edge0, __global float2 *edge1, __global float2 *x, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep4_kernel_code =
-"__kernel void test_smoothstep4(__global float4 *edge0, __global float4 *edge1, __global float4 *x, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep8_kernel_code =
-"__kernel void test_smoothstep8(__global float8 *edge0, __global float8 *edge1, __global float8 *x, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep16_kernel_code =
-"__kernel void test_smoothstep16(__global float16 *edge0, __global float16 *edge1, __global float16 *x, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep3_kernel_code =
-"__kernel void test_smoothstep3(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(smoothstep(vload3(tid,edge0),vload3(tid,edge1),vload3(tid,x)), tid, dst);\n"
-"}\n";
 
 #define MAX_ERR (1e-5f)
 
-static float
-verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr, int n)
-{
-  float       r, t, delta, max_err = 0.0f;
-  int         i;
-
-  for (i=0; i<n; i++)
-  {
-    t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
-    if (t < 0.0f)
-      t = 0.0f;
-    else if (t > 1.0f)
-      t = 1.0f;
-    r = t * t * (3.0f - 2.0f * t);
-    delta = (float)fabs(r - outptr[i]);
-    if (delta > max_err)
-      max_err = delta;
-  }
-
-  return max_err;
-}
+namespace {
 
-const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4", "SMOOTHSTEP float8", "SMOOTHSTEP float16", "SMOOTHSTEP float3" };
 
-int
-test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+template <typename T>
+int verify_smoothstep(const T *const edge0, const T *const edge1,
+                      const T *const x, const T *const outptr, const int n,
+                      const int veclen, const bool vecParam)
 {
-  cl_mem      streams[4];
-  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-  size_t  threads[1];
-  float max_err;
-  int num_elements;
-  int err;
-  int i;
-  MTdata d;
-
-  num_elements = n_elems * 16;
-
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[2])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[3])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  p = input_ptr[0];
-  d = init_genrand( gRandomSeed );
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00400000, 0x00400000, d);
-  }
-
-  p = input_ptr[1];
-  p_edge0 = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    float edge0 = p_edge0[i];
-    float edge1;
-    do {
-      edge1 = get_random_float(-0x00400000, 0x00400000, d);
-      if (edge0 < edge1)
-        break;
-    } while (1);
-    p[i] = edge1;
-  }
-
-  p = input_ptr[2];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00400000, 0x00400000, d);
-  }
-  free_mtdata(d);
-  d = NULL;
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &smoothstep8_kernel_code, "test_smoothstep8" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &smoothstep16_kernel_code, "test_smoothstep16" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &smoothstep3_kernel_code, "test_smoothstep3" );
-  if (err)
-    return -1;
-
-  for (i=0; i<kTotalVecCount; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
-      if (err != CL_SUCCESS)
+    T r, t;
+    float delta = 0;
+
+    if (vecParam)
     {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
+        for (int i = 0; i < n * veclen; i++)
+        {
+            t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
+            if (t < 0.0f)
+                t = 0.0f;
+            else if (t > 1.0f)
+                t = 1.0f;
+            r = t * t * (3.0f - 2.0f * t);
+            delta = (float)fabs(r - outptr[i]);
+            if (delta > MAX_ERR)
+            {
+                log_error("%d) verification error: smoothstep(%a, %a, %a) = "
+                          "*%a vs. %a\n",
+                          i, x[i], edge0[i], edge1[i], r, outptr[i]);
+                return -1;
+            }
+        }
     }
-  }
+    else
+    {
+        for (int i = 0; i < n; ++i)
+        {
+            int ii = i / veclen;
+            int vi = i * veclen;
+            for (int j = 0; j < veclen; ++j, ++vi)
+            {
+                t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
+                if (t < 0.0f)
+                    t = 0.0f;
+                else if (t > 1.0f)
+                    t = 1.0f;
+                r = t * t * (3.0f - 2.0f * t);
+                delta = (float)fabs(r - outptr[vi]);
+                if (delta > MAX_ERR)
+                {
+                    log_error("{%d, element %d}) verification error: "
+                              "smoothstep(%a, %a, %a) = *%a vs. %a\n",
+                              ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]);
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+}
+
+
+template <typename T>
+int test_smoothstep_fn(cl_device_id device, cl_context context,
+                       cl_command_queue queue, int n_elems, bool vecParam)
+{
+    clMemWrapper streams[4];
+    std::vector<T> input_ptr[3], output_ptr;
+
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
 
+    for (i = 0; i < 3; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
 
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
+    for (i = 0; i < 4; i++)
     {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
     }
 
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
+    {
+        for (i = 0; i < num_elements; i++)
+        {
+            input_ptr[0][i] = get_random_float(-0x00200000, 0x00010000, d);
+            input_ptr[1][i] = get_random_float(input_ptr[0][i], 0x00200000, d);
+            input_ptr[2][i] = get_random_float(-0x20000000, 0x20000000, d);
+        }
+    }
+    else if (std::is_same<T, double>::value)
+    {
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (i = 0; i < num_elements; i++)
+        {
+            input_ptr[0][i] = get_random_double(-0x00200000, 0x00010000, d);
+            input_ptr[1][i] = get_random_double(input_ptr[0][i], 0x00200000, d);
+            input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d);
+        }
+    }
 
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
+    for (i = 0; i < 3; i++)
     {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
     }
 
-    max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems * g_arrVecSizes[i]);
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
 
-    if (max_err > MAX_ERR)
+    for (i = 0; i < kTotalVecCount; i++)
     {
-      log_error("%s test failed %g max err\n", fn_names[i], max_err);
-      err = -1;
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
+        {
+            if (vecParam)
+            {
+                std::string str = smoothstep_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+            else
+            {
+                std::string str = smoothstep_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str(), tname.c_str());
+            }
+        }
+        else
+        {
+            // regular path
+            std::string str = smoothstep_fn_code_pattern;
+            kernelSource =
+                string_format(str, pragma_str.c_str(), tname.c_str(),
+                              vecParam ? vecSizeNames[i] : "", tname.c_str(),
+                              vecParam ? vecSizeNames[i] : "", tname.c_str(),
+                              vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
+        }
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
+
+        for (int j = 0; j < 4; j++)
+        {
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
+        }
+
+        size_t threads = (size_t)n_elems;
+
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
+
+        err = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
+
+        if (verify_smoothstep((T *)&input_ptr[0].front(),
+                              (T *)&input_ptr[1].front(),
+                              (T *)&input_ptr[2].front(), &output_ptr[0],
+                              n_elems, g_arrVecSizes[i], vecParam))
+        {
+            log_error("smoothstep %s%d%s test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecParam ? "" : std::string(", " + tname).c_str());
+            err = -1;
+        }
+        else
+        {
+            log_info("smoothstep %s%d%s test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecParam ? "" : std::string(", " + tname).c_str());
+            err = 0;
+        }
+
+        if (err) break;
     }
-    else
+
+    return err;
+}
+
+
+cl_int SmoothstepTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error =
+        test_smoothstep_fn<float>(device, context, queue, num_elems, vecParam);
+    test_error(error, "SmoothstepTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
     {
-      log_info("%s test passed %g max err\n", fn_names[i], max_err);
-      err = 0;
+        error = test_smoothstep_fn<double>(device, context, queue, num_elems,
+                                           vecParam);
+        test_error(error, "SmoothstepTest::Run<double> failed");
     }
 
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  clReleaseMemObject(streams[2]);
-  clReleaseMemObject(streams[3]);
-  for (i=0; i<kTotalVecCount; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(input_ptr[1]);
-  free(input_ptr[2]);
-  free(output_ptr);
-
-  return err;
+    return error;
 }
 
 
+int test_smoothstep(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
+                                          "smoothstep", true);
+}
+
+
+int test_smoothstepf(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<SmoothstepTest>(device, context, queue, n_elems,
+                                          "smoothstep", false);
+}
diff --git a/test_conformance/commonfns/test_smoothstepf.cpp b/test_conformance/commonfns/test_smoothstepf.cpp
deleted file mode 100644
index ac09e9ec..00000000
--- a/test_conformance/commonfns/test_smoothstepf.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static const char *smoothstep_kernel_code =
-"__kernel void test_smoothstep(__global float *edge0, __global float *edge1, __global float *x, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep2_kernel_code =
-"__kernel void test_smoothstep2f(__global float *edge0, __global float *edge1, __global float2 *x, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-static const char *smoothstep4_kernel_code =
-"__kernel void test_smoothstep4f(__global float *edge0, __global float *edge1, __global float4 *x, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = smoothstep(edge0[tid], edge1[tid], x[tid]);\n"
-"}\n";
-
-#define MAX_ERR (1e-5f)
-
-float verify_smoothstep(float *edge0, float *edge1, float *x, float *outptr,
-                        int n, int veclen)
-{
-  float       r, t, delta, max_err = 0.0f;
-  int         i, j;
-
-  for (i = 0; i < n; ++i) {
-    int vi = i * veclen;
-    for (j = 0; j < veclen; ++j, ++vi) {
-      t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
-      if (t < 0.0f)
-        t = 0.0f;
-      else if (t > 1.0f)
-        t = 1.0f;
-      r = t * t * (3.0f - 2.0f * t);
-      delta = (float)fabs(r - outptr[vi]);
-      if (delta > max_err)
-        max_err = delta;
-    }
-  }
-  return max_err;
-}
-
-const static char *fn_names[] = { "SMOOTHSTEP float", "SMOOTHSTEP float2", "SMOOTHSTEP float4"};
-
-int
-test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-  cl_mem      streams[4];
-  cl_float    *input_ptr[3], *output_ptr, *p, *p_edge0;
-  cl_program  program[3];
-  cl_kernel   kernel[3];
-  size_t  threads[1];
-  float max_err = 0.0f;
-  int num_elements;
-  int err;
-  int i;
-  MTdata d;
-
-  num_elements = n_elems * 4;
-
-  input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  input_ptr[2] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-  streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[0])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[1])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-  streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[2])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                              sizeof(cl_float) * num_elements, NULL, NULL);
-  if (!streams[3])
-  {
-    log_error("clCreateBuffer failed\n");
-    return -1;
-  }
-
-  d = init_genrand( gRandomSeed );
-  p = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00200000, 0x00200000, d);
-  }
-
-  p = input_ptr[1];
-  p_edge0 = input_ptr[0];
-  for (i=0; i<num_elements; i++)
-  {
-    float edge0 = p_edge0[i];
-    float edge1;
-    do {
-      edge1 = get_random_float( -0x00200000, 0x00200000, d);
-      if (edge0 < edge1)
-        break;
-    } while (1);
-    p[i] = edge1;
-  }
-
-  p = input_ptr[2];
-  for (i=0; i<num_elements; i++)
-  {
-    p[i] = get_random_float(-0x00200000, 0x00200000, d);
-  }
-  free_mtdata(d);
-  d = NULL;
-
-  err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-  err = clEnqueueWriteBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[2], 0, NULL, NULL );
-  if (err != CL_SUCCESS)
-  {
-    log_error("clWriteArray failed\n");
-    return -1;
-  }
-
-  err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &smoothstep_kernel_code, "test_smoothstep" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &smoothstep2_kernel_code, "test_smoothstep2f" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &smoothstep4_kernel_code, "test_smoothstep4f" );
-  if (err)
-    return -1;
-
-  for (i=0; i<3; i++)
-  {
-      err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-      err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-      err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-      err |= clSetKernelArg(kernel[i], 3, sizeof streams[3], &streams[3] );
-      if (err != CL_SUCCESS)
-    {
-      log_error("clSetKernelArgs failed\n");
-      return -1;
-    }
-  }
-
-  threads[0] = (size_t)n_elems;
-  for (i=0; i<3; i++)
-  {
-    err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueNDRangeKernel failed\n");
-      return -1;
-    }
-
-    err = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-      log_error("clEnqueueReadBuffer failed\n");
-      return -1;
-    }
-
-    switch (i)
-    {
-      case 0:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 1);
-        break;
-      case 1:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 2);
-        break;
-      case 2:
-        max_err = verify_smoothstep(input_ptr[0], input_ptr[1], input_ptr[2], output_ptr, n_elems, 4);
-        break;
-    }
-
-    if (max_err > MAX_ERR)
-    {
-      log_error("%s test failed %g max err\n", fn_names[i], max_err);
-      err = -1;
-    }
-    else
-    {
-      log_info("%s test passed %g max err\n", fn_names[i], max_err);
-      err = 0;
-    }
-
-    if (err)
-      break;
-  }
-
-  clReleaseMemObject(streams[0]);
-  clReleaseMemObject(streams[1]);
-  clReleaseMemObject(streams[2]);
-  clReleaseMemObject(streams[3]);
-  for (i=0; i<3; i++)
-  {
-    clReleaseKernel(kernel[i]);
-    clReleaseProgram(program[i]);
-  }
-  free(input_ptr[0]);
-  free(input_ptr[1]);
-  free(input_ptr[2]);
-  free(output_ptr);
-
-  return err;
-}
-
-
diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp
index 330083b2..dc91766e 100644
--- a/test_conformance/commonfns/test_step.cpp
+++ b/test_conformance/commonfns/test_step.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2023 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -13,532 +13,252 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
-
 #include <stdio.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 
 #include "procs.h"
-
-static int
-test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-const char *step_kernel_code =
-"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step2_kernel_code =
-"__kernel void test_step2(__global float2 *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step4_kernel_code =
-"__kernel void test_step4(__global float4 *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step8_kernel_code =
-"__kernel void test_step8(__global float8 *srcA, __global float8 *srcB, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step16_kernel_code =
-"__kernel void test_step16(__global float16 *srcA, __global float16 *srcB, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step3_kernel_code =
-"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-"}\n";
-
-
-int
-verify_step(float *inptrA, float *inptrB, float *outptr, int n)
-{
-    float       r;
-    int         i;
-
-    for (i=0; i<n; i++)
-    {
-        r = (inptrB[i] < inptrA[i]) ? 0.0f : 1.0f;
-        if (r != outptr[i])
-            return -1;
-    }
-
-    return 0;
-}
-
-int
-test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
+#include "test_base.h"
+
+
+const char *step_fn_code_pattern = "%s\n" /* optional pragma */
+                                   "__kernel void test_fn(__global %s%s *edge, "
+                                   "__global %s%s *x, __global %s%s *dst)\n"
+                                   "{\n"
+                                   "    int  tid = get_global_id(0);\n"
+                                   "    dst[tid] = step(edge[tid], x[tid]);\n"
+                                   "}\n";
+
+const char *step_fn_code_pattern_v3 =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s "
+    "*dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(step(vload3(tid,edge), vload3(tid,x)), tid, dst);\n"
+    "}\n";
+
+const char *step_fn_code_pattern_v3_scalar =
+    "%s\n" /* optional pragma */
+    "__kernel void test_fn(__global %s *edge, __global %s *x, __global %s "
+    "*dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "    vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n"
+    "}\n";
+
+
+namespace {
+
+template <typename T>
+int verify_step(const T *const inptrA, const T *const inptrB,
+                const T *const outptr, const int n, const int veclen,
+                const bool vecParam)
 {
-    cl_mem      streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-  cl_program  program[kTotalVecCount];
-  cl_kernel   kernel[kTotalVecCount];
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-  num_elements = n_elems * 16;
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
+    T r;
 
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
-    if (err) return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
-    if (err) return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
-    if (err) return -1;
-    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1,
-                                      &step8_kernel_code, "test_step8");
-    if (err) return -1;
-    err = create_single_kernel_helper(context, &program[4], &kernel[4], 1,
-                                      &step16_kernel_code, "test_step16");
-    if (err) return -1;
-    err = create_single_kernel_helper(context, &program[5], &kernel[5], 1,
-                                      &step3_kernel_code, "test_step3");
-    if (err) return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-  for (i=0; i <kTotalVecCount; i++)
+    if (vecParam)
     {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
+        for (int i = 0; i < n * veclen; i++)
         {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
+            r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
+            if (r != outptr[i]) return -1;
         }
     }
-
-    threads[0] = (size_t)n_elems;
-  for (i=0; i<kTotalVecCount; i++)
+    else
     {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
+        for (int i = 0; i < n;)
         {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
+            int ii = i / veclen;
+            for (int j = 0; j < veclen && i < n; ++j, ++i)
+            {
+                r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
+                if (r != outptr[i])
+                {
+                    log_error("Failure @ {%d, element %d}: step(%a,%a) -> *%a "
+                              "vs %a\n",
+                              ii, j, inptrA[ii], inptrB[i], r, outptr[i]);
+                    return -1;
+                }
+            }
         }
+    }
 
-        switch (i)
-        {
-            case 0:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems);
-                if (err)
-                    log_error("STEP float test failed\n");
-                else
-                    log_info("STEP float test passed\n");
-                break;
-
-            case 1:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
-                if (err)
-                    log_error("STEP float2 test failed\n");
-                else
-                    log_info("STEP float2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
-                if (err)
-                    log_error("STEP float4 test failed\n");
-                else
-                    log_info("STEP float4 test passed\n");
-                break;
-
-        case 3:
-        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
-        if (err)
-          log_error("STEP float8 test failed\n");
-        else
-          log_info("STEP float8 test passed\n");
-        break;
-
-        case 4:
-        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
-        if (err)
-          log_error("STEP float16 test failed\n");
-        else
-          log_info("STEP float16 test passed\n");
-        break;
+    return 0;
+}
 
-        case 5:
-        err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
-        if (err)
-          log_error("STEP float3 test failed\n");
-        else
-          log_info("STEP float3 test passed\n");
-        break;
-        }
+}
 
-        if (err)
-            break;
-    }
 
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-  for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
+template <typename T>
+int test_step_fn(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int n_elems, bool vecParam)
+{
+    clMemWrapper streams[3];
+    std::vector<T> input_ptr[2], output_ptr;
 
-    if( err )
-        return err;
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
 
-    if( ! is_extension_available( device, "cl_khr_fp64" ))
-        return 0;
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
 
-    return test_step_double( device, context, queue, n_elems);
-}
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
 
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
 
-#pragma mark -
-
-const char *step_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step2_double(__global double2 *srcA, __global double2 *srcB, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step4_double(__global double4 *srcA, __global double4 *srcB, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step8_double(__global double8 *srcA, __global double8 *srcB, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step16_double(__global double16 *srcA, __global double16 *srcB, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-const char *step3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(vload3(tid,srcA), vload3(tid,srcB)),tid,dst);\n"
-"}\n";
-
-
-int
-verify_step_double(double *inptrA, double *inptrB, double *outptr, int n)
-{
-    double       r;
-    int         i;
+    for (i = 0; i < 2; i++) input_ptr[i].resize(num_elements);
+    output_ptr.resize(num_elements);
 
-    for (i=0; i<n; i++)
+    for (i = 0; i < 3; i++)
     {
-        r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
-        if (r != outptr[i])
-            return -1;
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
     }
 
-    return 0;
-}
-
-static int
-test_step_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[3];
-    cl_double    *input_ptr[2], *output_ptr, *p;
-    cl_program  program[kTotalVecCount];
-    cl_kernel   kernel[kTotalVecCount];
-    void        *values[3];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-    num_elements = n_elems * 16;
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
     {
-        log_error("clCreateBuffer failed\n");
-        return -1;
+        for (i = 0; i < num_elements; i++)
+        {
+            input_ptr[0][i] = get_random_float(-0x40000000, 0x40000000, d);
+            input_ptr[1][i] = get_random_float(-0x40000000, 0x40000000, d);
+        }
     }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[2])
+    else if (std::is_same<T, double>::value)
     {
-        log_error("clCreateBuffer failed\n");
-        return -1;
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (i = 0; i < num_elements; i++)
+        {
+            input_ptr[0][i] = get_random_double(-0x40000000, 0x40000000, d);
+            input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d);
+        }
     }
 
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
+    for (i = 0; i < 2; i++)
     {
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
+        err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
+                                   sizeof(T) * num_elements,
+                                   &input_ptr[i].front(), 0, NULL, NULL);
+        test_error(err, "Unable to write input buffer");
     }
-    free_mtdata(d); d = NULL;
 
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
+    char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
 
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
-    if (err)
-        return -1;
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    values[2] = streams[2];
-    for (i=0; i < kTotalVecCount; i++)
+    for (i = 0; i < kTotalVecCount; i++)
     {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
+        std::string kernelSource;
+        if (i >= kVectorSizeCount)
         {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
+            if (vecParam)
+            {
+                std::string str = step_fn_code_pattern_v3;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str());
+            }
+            else
+            {
+                std::string str = step_fn_code_pattern_v3_scalar;
+                kernelSource =
+                    string_format(str, pragma_str.c_str(), tname.c_str(),
+                                  tname.c_str(), tname.c_str());
+            }
         }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
+        else
         {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
+            // regular path
+            std::string str = step_fn_code_pattern;
+            kernelSource =
+                string_format(str, pragma_str.c_str(), tname.c_str(),
+                              vecParam ? vecSizeNames[i] : "", tname.c_str(),
+                              vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
         }
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+        test_error(err, "Unable to create kernel");
 
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
+        for (int j = 0; j < 3; j++)
         {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
+            err =
+                clSetKernelArg(kernels[i], j, sizeof(streams[j]), &streams[j]);
+            test_error(err, "Unable to set kernel argument");
         }
 
-        switch (i)
-        {
-            case 0:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems);
-                if (err)
-                    log_error("STEP double test failed\n");
-                else
-                    log_info("STEP double test passed\n");
-                break;
-
-            case 1:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2);
-                if (err)
-                    log_error("STEP double2 test failed\n");
-                else
-                    log_info("STEP double2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4);
-                if (err)
-                    log_error("STEP double4 test failed\n");
-                else
-                    log_info("STEP double4 test passed\n");
-                break;
-
-        case 3:
-        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8);
-        if (err)
-          log_error("STEP double8 test failed\n");
-        else
-          log_info("STEP double8 test passed\n");
-        break;
+        size_t threads = (size_t)n_elems;
 
-        case 4:
-        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16);
-        if (err)
-          log_error("STEP double16 test failed\n");
-        else
-          log_info("STEP double16 test passed\n");
-        break;
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        test_error(err, "Unable to execute kernel");
+
+        err = clEnqueueReadBuffer(queue, streams[2], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        test_error(err, "Unable to read results");
 
-        case 5:
-        err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3);
+        err = verify_step(&input_ptr[0].front(), &input_ptr[1].front(),
+                          &output_ptr.front(), n_elems, g_arrVecSizes[i],
+                          vecParam);
         if (err)
-          log_error("STEP double3 test failed\n");
+        {
+            log_error("step %s%d%s test failed\n", tname.c_str(),
+                      ((g_arrVecSizes[i])),
+                      vecParam ? "" : std::string(", " + tname).c_str());
+            err = -1;
+        }
         else
-          log_info("STEP double3 test passed\n");
-        break;
+        {
+            log_info("step %s%d%s test passed\n", tname.c_str(),
+                     ((g_arrVecSizes[i])),
+                     vecParam ? "" : std::string(", " + tname).c_str());
+            err = 0;
         }
 
         if (err)
             break;
     }
 
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
+    return err;
+}
+
+
+cl_int StepTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+
+    error = test_step_fn<float>(device, context, queue, num_elems, vecParam);
+    test_error(error, "StepTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
     {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
+        error =
+            test_step_fn<double>(device, context, queue, num_elems, vecParam);
+        test_error(error, "StepTest::Run<double> failed");
     }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
 
-    return err;
+    return error;
+}
+
+
+int test_step(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<StepTest>(device, context, queue, n_elems, "step",
+                                    true);
 }
 
+
+int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue,
+               int n_elems)
+{
+    return MakeAndRunTest<StepTest>(device, context, queue, n_elems, "step",
+                                    false);
+}
diff --git a/test_conformance/commonfns/test_stepf.cpp b/test_conformance/commonfns/test_stepf.cpp
deleted file mode 100644
index efada227..00000000
--- a/test_conformance/commonfns/test_stepf.cpp
+++ /dev/null
@@ -1,546 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-static int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems);
-
-
-static const char *step_kernel_code =
-"__kernel void test_step(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step2_kernel_code =
-"__kernel void test_step2(__global float *srcA, __global float2 *srcB, __global float2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step4_kernel_code =
-"__kernel void test_step4(__global float *srcA, __global float4 *srcB, __global float4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step8_kernel_code =
-"__kernel void test_step8(__global float *srcA, __global float8 *srcB, __global float8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step16_kernel_code =
-"__kernel void test_step16(__global float *srcA, __global float16 *srcB, __global float16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step3_kernel_code =
-"__kernel void test_step3(__global float *srcA, __global float *srcB, __global float *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
-"}\n";
-
-
-static int
-verify_step( cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n, int veclen)
-{
-    float       r;
-    int         i, j;
-
-    for (i=0; i<n; ) {
-        int ii = i/veclen;
-        for (j=0; j<veclen && i<n; ++j, ++i) {
-            r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
-            if (r != outptr[i])
-            {
-                log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[3];
-    cl_float    *input_ptr[2], *output_ptr, *p;
-    cl_program  program[kTotalVecCount];
-    cl_kernel   kernel[kTotalVecCount];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata d;
-    num_elements = n_elems * 16;
-
-    input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    input_ptr[1] = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_float) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-    {
-        p[i] = get_random_float(-0x40000000, 0x40000000, d);
-    }
-    free_mtdata(d);   d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
-    if (err)
-        return -1;
-
-    for (i=0; i <kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        switch (i)
-        {
-            case 0:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
-                if (err)
-                    log_error("STEP float test failed\n");
-                else
-                    log_info("STEP float test passed\n");
-                break;
-
-            case 1:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
-                if (err)
-                    log_error("STEP float2 test failed\n");
-                else
-                    log_info("STEP float2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
-                if (err)
-                    log_error("STEP float4 test failed\n");
-                else
-                    log_info("STEP float4 test passed\n");
-                break;
-
-            case 3:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
-                if (err)
-                    log_error("STEP float8 test failed\n");
-                else
-                    log_info("STEP float8 test passed\n");
-                break;
-
-            case 4:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
-                if (err)
-                    log_error("STEP float16 test failed\n");
-                else
-                    log_info("STEP float16 test passed\n");
-                break;
-
-            case 5:
-                err = verify_step(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
-                if (err)
-                    log_error("STEP float3 test failed\n");
-                else
-                    log_info("STEP float3 test passed\n");
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    if(err)
-        return err;
-
-    if( ! is_extension_available( device, "cl_khr_fp64" ))
-    {
-        log_info( "Device does not support cl_khr_fp64.  Skipping double precision tests.\n" );
-        return 0;
-    }
-
-    return test_stepf_double( device, context, queue, n_elems);
-}
-
-#pragma mark -
-
-static const char *step_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step2_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step2_double(__global double *srcA, __global double2 *srcB, __global double2 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step4_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step4_double(__global double *srcA, __global double4 *srcB, __global double4 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step8_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step8_double(__global double *srcA, __global double8 *srcB, __global double8 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step16_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step16_double(__global double *srcA, __global double16 *srcB, __global double16 *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = step(srcA[tid], srcB[tid]);\n"
-"}\n";
-
-static const char *step3_kernel_code_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void test_step3_double(__global double *srcA, __global double *srcB, __global double *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    vstore3(step(srcA[tid], vload3(tid,srcB)) ,tid,dst);\n"
-"}\n";
-
-
-static int
-verify_step_double(cl_double *inptrA, cl_double *inptrB, cl_double *outptr, int n, int veclen)
-{
-    double r;
-    int    i, j;
-
-    for (i=0; i<n; ) {
-        int ii = i/veclen;
-        for (j=0; j<veclen && i<n; ++j, ++i) {
-            r = (inptrB[i] < inptrA[ii]) ? 0.0 : 1.0;
-            if (r != outptr[i])
-            {
-                log_error( "Failure @ {%d, element %d}: step(%a,%a) -> *%a vs %a\n", ii, j, inptrA[ii], inptrB[i], r, outptr[i] );
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int test_stepf_double(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem      streams[3];
-    cl_double  *input_ptr[2], *output_ptr, *p;
-    cl_program  program[kTotalVecCount];
-    cl_kernel   kernel[kTotalVecCount];
-    size_t  threads[1];
-    int num_elements;
-    int err;
-    int i;
-    MTdata    d;
-    num_elements = n_elems * 16;
-
-    input_ptr[0] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    input_ptr[1] = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    output_ptr = (cl_double*)malloc(sizeof(cl_double) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_double) * num_elements, NULL, NULL);
-    if (!streams[2])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
-
-    p = input_ptr[1];
-    for (i=0; i<num_elements; i++)
-        p[i] = get_random_double(-0x40000000, 0x40000000, d);
-
-    free_mtdata(d);   d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-    err = clEnqueueWriteBuffer( queue, streams[1], true, 0, sizeof(cl_double)*num_elements, (void *)input_ptr[1], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code_double, "test_step_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code_double, "test_step2_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code_double, "test_step4_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code_double, "test_step8_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code_double, "test_step16_double" );
-    if (err)
-        return -1;
-    err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code_double, "test_step3_double" );
-    if (err)
-        return -1;
-
-    for (i=0; i <kTotalVecCount; i++)
-    {
-        err = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0] );
-        err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1] );
-        err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2] );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clSetKernelArgs failed\n");
-            return -1;
-        }
-    }
-
-    threads[0] = (size_t)n_elems;
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        err = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueNDRangeKernel failed\n");
-            return -1;
-        }
-
-        err = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof(cl_double)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-        if (err != CL_SUCCESS)
-        {
-            log_error("clEnqueueReadBuffer failed\n");
-            return -1;
-        }
-
-        switch (i)
-        {
-            case 0:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems, 1);
-                if (err)
-                    log_error("STEP double test failed\n");
-                else
-                    log_info("STEP double test passed\n");
-                break;
-
-            case 1:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*2, 2);
-                if (err)
-                    log_error("STEP double2 test failed\n");
-                else
-                    log_info("STEP double2 test passed\n");
-                break;
-
-            case 2:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*4, 4);
-                if (err)
-                    log_error("STEP double4 test failed\n");
-                else
-                    log_info("STEP double4 test passed\n");
-                break;
-
-            case 3:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*8, 8);
-                if (err)
-                    log_error("STEP double8 test failed\n");
-                else
-                    log_info("STEP double8 test passed\n");
-                break;
-
-            case 4:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*16, 16);
-                if (err)
-                    log_error("STEP double16 test failed\n");
-                else
-                    log_info("STEP double16 test passed\n");
-                break;
-
-            case 5:
-                err = verify_step_double(input_ptr[0], input_ptr[1], output_ptr, n_elems*3, 3);
-                if (err)
-                    log_error("STEP double3 test failed\n");
-                else
-                    log_info("STEP double3 test passed\n");
-                break;
-        }
-
-        if (err)
-            break;
-    }
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseMemObject(streams[2]);
-    for (i=0; i<kTotalVecCount; i++)
-    {
-        clReleaseKernel(kernel[i]);
-        clReleaseProgram(program[i]);
-    }
-    free(input_ptr[0]);
-    free(input_ptr[1]);
-    free(output_ptr);
-
-    return err;
-}
-
diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp
new file mode 100644
index 00000000..fed4389d
--- /dev/null
+++ b/test_conformance/commonfns/test_unary_fn.cpp
@@ -0,0 +1,365 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <vector>
+
+#include "harness/deviceInfo.h"
+#include "harness/typeWrappers.h"
+
+#include "procs.h"
+#include "test_base.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846264338327950288
+#endif
+
+
+// clang-format off
+const char *unary_fn_code_pattern =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s%s *src, __global %s%s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    dst[tid] = %s(src[tid]);\n"
+"}\n";
+
+const char *unary_fn_code_pattern_v3 =
+"%s\n" /* optional pragma */
+"__kernel void test_fn(__global %s *src, __global %s *dst)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"\n"
+"    vstore3(%s(vload3(tid,src)), tid, dst);\n"
+"}\n";
+// clang-format on
+
+
+#define MAX_ERR 2.0f
+
+namespace {
+
+
+template <typename T> float UlpFn(const T &val, const double &r)
+{
+    if (std::is_same<T, double>::value)
+        return Ulp_Error_Double(val, r);
+    else if (std::is_same<T, float>::value)
+        return Ulp_Error(val, r);
+    else if (std::is_same<T, half>::value)
+        return Ulp_Error(val, r);
+}
+
+
+template <typename T>
+int verify_degrees(const T *const inptr, const T *const outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double r, max_val = NAN;
+    int max_index = 0;
+
+    for (int i = 0, j = 0; i < n; i++, j++)
+    {
+        r = (180.0 / M_PI) * inptr[i];
+
+        error = UlpFn(outptr[i], r);
+
+        if (fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if (fabsf(error) > MAX_ERR)
+            {
+                log_error("%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n",
+                          i, inptr[i], r, outptr[i], r, outptr[i], error);
+                return 1;
+            }
+        }
+    }
+
+    log_info("degrees: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
+             max_error, max_index, max_val, outptr[max_index], max_val,
+             outptr[max_index]);
+
+    return 0;
+}
+
+
+template <typename T>
+int verify_radians(const T *const inptr, const T *const outptr, int n)
+{
+    float error, max_error = 0.0f;
+    double r, max_val = NAN;
+    int max_index = 0;
+
+    for (int i = 0, j = 0; i < n; i++, j++)
+    {
+        r = (M_PI / 180.0) * inptr[i];
+        error = Ulp_Error(outptr[i], r);
+        if (fabsf(error) > max_error)
+        {
+            max_error = error;
+            max_index = i;
+            max_val = r;
+            if (fabsf(error) > MAX_ERR)
+            {
+                log_error("%d) Error @ %a: *%a vs %a  (*%g vs %g) ulps: %f\n",
+                          i, inptr[i], r, outptr[i], r, outptr[i], error);
+                return 1;
+            }
+        }
+    }
+
+    log_info("radians: Max error %f ulps at %d: *%a vs %a  (*%g vs %g)\n",
+             max_error, max_index, max_val, outptr[max_index], max_val,
+             outptr[max_index]);
+
+    return 0;
+}
+
+
+template <typename T>
+int verify_sign(const T *const inptr, const T *const outptr, int n)
+{
+    T r = 0;
+    for (int i = 0; i < n; i++)
+    {
+        if (inptr[i] > 0.0f)
+            r = 1.0;
+        else if (inptr[i] < 0.0f)
+            r = -1.0;
+        else
+            r = 0.0;
+        if (r != outptr[i]) return -1;
+    }
+    return 0;
+}
+
+}
+
+
+template <typename T>
+int test_unary_fn(cl_device_id device, cl_context context,
+                  cl_command_queue queue, int n_elems,
+                  const std::string &fnName, VerifyFuncUnary<T> verifyFn)
+{
+    clMemWrapper streams[2];
+    std::vector<T> input_ptr, output_ptr;
+
+    std::vector<clProgramWrapper> programs;
+    std::vector<clKernelWrapper> kernels;
+
+    int err, i;
+    MTdataHolder d = MTdataHolder(gRandomSeed);
+
+    assert(BaseFunctionTest::type2name.find(sizeof(T))
+           != BaseFunctionTest::type2name.end());
+    auto tname = BaseFunctionTest::type2name[sizeof(T)];
+
+    programs.resize(kTotalVecCount);
+    kernels.resize(kTotalVecCount);
+
+    int num_elements = n_elems * (1 << (kTotalVecCount - 1));
+
+    input_ptr.resize(num_elements);
+    output_ptr.resize(num_elements);
+
+    for (i = 0; i < 2; i++)
+    {
+        streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                    sizeof(T) * num_elements, NULL, &err);
+        test_error(err, "clCreateBuffer failed");
+    }
+
+    std::string pragma_str;
+    if (std::is_same<T, float>::value)
+    {
+        for (int j = 0; j < num_elements; j++)
+        {
+            input_ptr[j] = get_random_float((float)(-100000.f * M_PI),
+                                            (float)(100000.f * M_PI), d);
+        }
+    }
+    else if (std::is_same<T, double>::value)
+    {
+        pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+        for (int j = 0; j < num_elements; j++)
+        {
+            input_ptr[j] =
+                get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d);
+        }
+    }
+
+    err = clEnqueueWriteBuffer(queue, streams[0], true, 0,
+                               sizeof(T) * num_elements, &input_ptr.front(), 0,
+                               NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("clEnqueueWriteBuffer failed\n");
+        return -1;
+    }
+
+    for (i = 0; i < kTotalVecCount; i++)
+    {
+        std::string kernelSource;
+        char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+
+        if (i >= kVectorSizeCount)
+        {
+            std::string str = unary_fn_code_pattern_v3;
+            kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(),
+                                         tname.c_str(), fnName.c_str());
+        }
+        else
+        {
+            std::string str = unary_fn_code_pattern;
+            kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(),
+                                         vecSizeNames[i], tname.c_str(),
+                                         vecSizeNames[i], fnName.c_str());
+        }
+
+        /* Create kernels */
+        const char *programPtr = kernelSource.c_str();
+        err =
+            create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
+                                        (const char **)&programPtr, "test_fn");
+
+        err = clSetKernelArg(kernels[i], 0, sizeof streams[0], &streams[0]);
+        err |= clSetKernelArg(kernels[i], 1, sizeof streams[1], &streams[1]);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clSetKernelArgs failed\n");
+            return -1;
+        }
+
+        // Line below is troublesome...
+        size_t threads = (size_t)num_elements / ((g_arrVecSizes[i]));
+        err = clEnqueueNDRangeKernel(queue, kernels[i], 1, NULL, &threads, NULL,
+                                     0, NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueNDRangeKernel failed\n");
+            return -1;
+        }
+
+        cl_uint dead = 42;
+        memset_pattern4(&output_ptr[0], &dead, sizeof(T) * num_elements);
+        err = clEnqueueReadBuffer(queue, streams[1], true, 0,
+                                  sizeof(T) * num_elements, &output_ptr[0], 0,
+                                  NULL, NULL);
+        if (err != CL_SUCCESS)
+        {
+            log_error("clEnqueueReadBuffer failed\n");
+            return -1;
+        }
+
+        if (verifyFn((T *)&input_ptr.front(), (T *)&output_ptr.front(),
+                     n_elems * (i + 1)))
+        {
+            log_error("%s %s%d test failed\n", fnName.c_str(), tname.c_str(),
+                      ((g_arrVecSizes[i])));
+            err = -1;
+        }
+        else
+        {
+            log_info("%s %s%d test passed\n", fnName.c_str(), tname.c_str(),
+                     ((g_arrVecSizes[i])));
+        }
+
+        if (err) break;
+    }
+
+    return err;
+}
+
+
+cl_int DegreesTest::Run()
+{
+    cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
+                                        fnName.c_str(), verify_degrees<float>);
+    test_error(error, "DegreesTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_unary_fn<double>(device, context, queue, num_elems,
+                                      fnName.c_str(), verify_degrees<double>);
+        test_error(error, "DegreesTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+cl_int RadiansTest::Run()
+{
+    cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
+                                        fnName.c_str(), verify_radians<float>);
+    test_error(error, "RadiansTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_unary_fn<double>(device, context, queue, num_elems,
+                                      fnName.c_str(), verify_radians<double>);
+        test_error(error, "RadiansTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+cl_int SignTest::Run()
+{
+    cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
+                                        fnName.c_str(), verify_sign<float>);
+    test_error(error, "SignTest::Run<float> failed");
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+    {
+        error = test_unary_fn<double>(device, context, queue, num_elems,
+                                      fnName.c_str(), verify_sign<double>);
+        test_error(error, "SignTest::Run<double> failed");
+    }
+
+    return error;
+}
+
+
+int test_degrees(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<DegreesTest>(device, context, queue, n_elems,
+                                       "degrees");
+}
+
+
+int test_radians(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int n_elems)
+{
+    return MakeAndRunTest<RadiansTest>(device, context, queue, n_elems,
+                                       "radians");
+}
+
+
+int test_sign(cl_device_id device, cl_context context, cl_command_queue queue,
+              int n_elems)
+{
+    return MakeAndRunTest<SignTest>(device, context, queue, n_elems, "sign");
+}
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index b95b0f53..d53af8dc 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -82,9 +82,12 @@ const char *known_extensions[] = {
     "cl_khr_external_semaphore_win32",
     "cl_khr_external_semaphore_sync_fd",
     "cl_khr_external_semaphore_opaque_fd",
+    "cl_khr_external_semaphore_dx_fence",
     "cl_khr_external_memory",
     "cl_khr_external_memory_win32",
     "cl_khr_external_memory_opaque_fd",
+    "cl_khr_external_memory_dx",
+    "cl_khr_external_memory_dma_buf",
     "cl_khr_command_buffer",
     "cl_khr_command_buffer_mutable_dispatch",
 };
@@ -117,6 +120,11 @@ const char *kernel_strings[] = {
     "}\n"
 };
 
+bool string_has_prefix(const char *str, const char *prefix)
+{
+    return strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
 int test_compiler_defines_for_extensions(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
 {
 
@@ -164,6 +172,7 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context
 
     memset( extension_type, 0, sizeof( extension_type) );
 
+    bool failed = false;
     // loop over extension string
     while (currentP != extensions + stringSize)
     {
@@ -199,88 +208,96 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context
 
         // record the extension name
         uintptr_t extension_length = (uintptr_t) currentP - (uintptr_t) start;
-        extensions_supported[ num_of_supported_extensions ] = (char*) malloc( (extension_length + 1) * sizeof( char ) );
-        if( NULL == extensions_supported[ num_of_supported_extensions ] )
+        char *extension = (char *)malloc((extension_length + 1) * sizeof(char));
+        if (extension == NULL)
         {
             log_error( "Error: unable to allocate memory to hold extension name: %ld chars\n", extension_length );
             return -1;
         }
-        memcpy( extensions_supported[ num_of_supported_extensions ], start, extension_length * sizeof( char ) );
-        extensions_supported[ num_of_supported_extensions ][extension_length] = '\0';
+        extensions_supported[num_of_supported_extensions] = extension;
+        memcpy(extension, start, extension_length * sizeof(char));
+        extension[extension_length] = '\0';
 
         // If the extension is a cl_khr extension, make sure it is an approved cl_khr extension -- looking for misspellings here
-        if( extensions_supported[ num_of_supported_extensions ][0] == 'c'  &&
-            extensions_supported[ num_of_supported_extensions ][1] == 'l'  &&
-            extensions_supported[ num_of_supported_extensions ][2] == '_'  &&
-            extensions_supported[ num_of_supported_extensions ][3] == 'k'  &&
-            extensions_supported[ num_of_supported_extensions ][4] == 'h'  &&
-            extensions_supported[ num_of_supported_extensions ][5] == 'r'  &&
-            extensions_supported[ num_of_supported_extensions ][6] == '_' )
+        if (string_has_prefix(extension, "cl_khr_"))
         {
             size_t ii;
             for( ii = 0; ii < num_known_extensions; ii++ )
             {
-                if( 0 == strcmp( known_extensions[ii], extensions_supported[ num_of_supported_extensions ] ) )
-                    break;
+                if (strcmp(known_extensions[ii], extension) == 0) break;
             }
             if( ii == num_known_extensions )
             {
-                log_error( "FAIL: Extension %s is not in the list of approved Khronos extensions!", extensions_supported[ num_of_supported_extensions ] );
-                return -1;
+                log_error("FAIL: Extension %s is not in the list of approved "
+                          "Khronos extensions!\n",
+                          extension);
+                failed = true;
             }
         }
         // Is it an embedded extension?
-        else if( memcmp( extensions_supported[ num_of_supported_extensions ], "cles_khr_", 9 ) == 0 )
+        else if (string_has_prefix(extension, "cles_khr_"))
         {
             // Yes, but is it a known one?
             size_t ii;
             for( ii = 0; known_embedded_extensions[ ii ] != NULL; ii++ )
             {
-                if( strcmp( known_embedded_extensions[ ii ], extensions_supported[ num_of_supported_extensions ] ) == 0 )
+                if (strcmp(known_embedded_extensions[ii], extension) == 0)
                     break;
             }
             if( known_embedded_extensions[ ii ] == NULL )
             {
-                log_error( "FAIL: Extension %s is not in the list of approved Khronos embedded extensions!", extensions_supported[ num_of_supported_extensions ] );
-                return -1;
+                log_error("FAIL: Extension %s is not in the list of approved "
+                          "Khronos embedded extensions!\n",
+                          extension);
+                failed = true;
             }
-
-            // It's approved, but are we even an embedded system?
-            char profileStr[128] = "";
-            error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), &profileStr, NULL );
-            test_error( error, "Unable to get CL_DEVICE_PROFILE to validate embedded extension name" );
-
-            if( strcmp( profileStr, "EMBEDDED_PROFILE" ) != 0 )
+            else
             {
-                log_error( "FAIL: Extension %s is an approved embedded extension, but on a non-embedded profile!", extensions_supported[ num_of_supported_extensions ] );
-                return -1;
+                // It's approved, but are we even an embedded system?
+                char profileStr[128] = "";
+                error = clGetDeviceInfo(device, CL_DEVICE_PROFILE,
+                                        sizeof(profileStr), &profileStr, NULL);
+                test_error(error,
+                           "Unable to get CL_DEVICE_PROFILE to validate "
+                           "embedded extension name");
+
+                if (strcmp(profileStr, "EMBEDDED_PROFILE") != 0)
+                {
+                    log_error(
+                        "FAIL: Extension %s is an approved embedded extension, "
+                        "but on a non-embedded profile!\n",
+                        extension);
+                    failed = true;
+                }
             }
         }
         else
         { // All other extensions must be of the form cl_<vendor_name>_<name>
-            if( extensions_supported[ num_of_supported_extensions ][0] != 'c'  ||
-                extensions_supported[ num_of_supported_extensions ][1] != 'l'  ||
-                extensions_supported[ num_of_supported_extensions ][2] != '_' )
+            if (!string_has_prefix(extension, "cl_"))
             {
-                log_error( "FAIL:  Extension %s doesn't start with \"cl_\"!", extensions_supported[ num_of_supported_extensions ] );
-                return -1;
+                log_error("FAIL:  Extension %s doesn't start with \"cl_\"!\n",
+                          extension);
+                failed = true;
             }
-
-            if( extensions_supported[ num_of_supported_extensions ][3] == '_' || extensions_supported[ num_of_supported_extensions ][3] == '\0' )
+            else if (extension[3] == '_' || extension[3] == '\0')
             {
-                log_error( "FAIL:  Vendor name is missing in extension %s!", extensions_supported[ num_of_supported_extensions ] );
-                return -1;
+                log_error("FAIL:  Vendor name is missing in extension %s!\n",
+                          extension);
+                failed = true;
             }
-
-            // look for the second underscore for name
-            char *p = extensions_supported[ num_of_supported_extensions ] + 4;
-            while( *p != '\0' && *p != '_' )
-                p++;
-
-            if( *p != '_' || p[1] == '\0')
+            else
             {
-                log_error( "FAIL:  extension name is missing in extension %s!", extensions_supported[ num_of_supported_extensions ] );
-                return -1;
+                // look for the second underscore for name
+                char *p = extension + 4;
+                while (*p != '\0' && *p != '_') p++;
+
+                if (*p != '_' || p[1] == '\0')
+                {
+                    log_error(
+                        "FAIL:  extension name is missing in extension %s!\n",
+                        extension);
+                    failed = true;
+                }
             }
         }
 
@@ -288,6 +305,11 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context
         num_of_supported_extensions++;
     }
 
+    if (failed)
+    {
+        return -1;
+    }
+
     // Build a list of the known extensions that are not supported by the device
     char *extensions_not_supported[1024];
     int num_not_supported_extensions = 0;
diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 382cd6a3..9cecabea 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -1362,8 +1362,7 @@ int test_computeinfo(cl_device_id deviceID, cl_context context,
     else
     {
         // print device info
-        int onInfo;
-        for (onInfo = 0;
+        for (size_t onInfo = 0;
              onInfo < sizeof(device_infos) / sizeof(device_infos[0]); onInfo++)
         {
             log_info("Getting device IDs for %s devices\n",
@@ -1390,9 +1389,8 @@ int test_computeinfo(cl_device_id deviceID, cl_context context,
                 test_error(err, "clGetDeviceIDs failed");
             }
 
-            int onDevice;
-            for (onDevice = 0; onDevice < device_infos[onInfo].num_devices;
-                 onDevice++)
+            for (size_t onDevice = 0;
+                 onDevice < device_infos[onInfo].num_devices; onDevice++)
             {
                 log_info("%s Device %d of %d Info:\n",
                          device_infos[onInfo].device_type_name, onDevice + 1,
diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
index 523b6ead..cc019b26 100644
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -16,4 +16,6 @@ set_source_files_properties(
         COMPILE_FLAGS -march=i686)
 endif(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 3ee072da..dfb32279 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -13,6 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+#include "harness/testHarness.h"
 #include "harness/compat.h"
 
 #include "basic_test_conversions.h"
diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h
index 3e672da9..ab887afd 100644
--- a/test_conformance/conversions/basic_test_conversions.h
+++ b/test_conformance/conversions/basic_test_conversions.h
@@ -54,7 +54,6 @@ extern const char *gRoundingModeNames[ kRoundingModeCount ];        // { "", "_r
 extern const char *gSaturationNames[ kSaturationModeCount ];        // { "", "_sat" }
 extern const char *gVectorSizeNames[kVectorSizeCount];              // { "", "2", "4", "8", "16" }
 extern size_t gTypeSizes[ kTypeCount ];
-extern int gIsEmbedded;
 
 //Functions for clamping floating point numbers into the representable range for the type
 typedef float (*clampf)( float );
diff --git a/test_conformance/conversions/run_batch b/test_conformance/conversions/run_batch
index a99abeee..dcd6aa51 100644
--- a/test_conformance/conversions/run_batch
+++ b/test_conformance/conversions/run_batch
@@ -1,4 +1,4 @@
-#!/usr/bin/csh
+#!/bin/bash
 #
 # This runs the conversions in 32- and 64-bit modes, split into 9 processes for better throughput.
 # It is intended to allow for quicker debugging turnaround for code development purposes
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 2b18b925..2ee05463 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -23,7 +23,7 @@
 #include <sys/sysctl.h>
 #endif
 
-#if defined( __linux__ )
+#if defined(__linux__)
 #include <unistd.h>
 #include <sys/syscall.h>
 #include <linux/sysctl.h>
@@ -53,7 +53,7 @@
 #include "Sleep.h"
 #include "basic_test_conversions.h"
 
-#if (defined(_WIN32) && defined (_MSC_VER))
+#if (defined(_WIN32) && defined(_MSC_VER))
 // need for _controlfp_s and rouinding modes in RoundingMode
 #include "harness/testHarness.h"
 #endif
@@ -61,72 +61,73 @@
 #pragma mark -
 #pragma mark globals
 
-#define BUFFER_SIZE     (1024*1024)
-#define kPageSize       4096
+#define BUFFER_SIZE (1024 * 1024)
+#define kPageSize 4096
 #define EMBEDDED_REDUCTION_FACTOR 16
 #define PERF_LOOP_COUNT 100
 
-#define      kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
+#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
 
 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
 #include "fplib.h"
-    extern bool            qcom_sat;
-    extern roundingMode    qcom_rm;
+extern bool qcom_sat;
+extern roundingMode qcom_rm;
 #endif
 
-const char **   argList = NULL;
-int             argCount = 0;
-cl_context      gContext = NULL;
-cl_command_queue      gQueue = NULL;
-char            appName[64] = "ctest";
-int             gStartTestNumber = -1;
-int             gEndTestNumber = 0;
-#if defined( __APPLE__ )
-int             gTimeResults = 1;
+const char **argList = NULL;
+int argCount = 0;
+cl_context gContext = NULL;
+cl_command_queue gQueue = NULL;
+char appName[64] = "ctest";
+int gStartTestNumber = -1;
+int gEndTestNumber = 0;
+#if defined(__APPLE__)
+int gTimeResults = 1;
 #else
-int             gTimeResults = 0;
+int gTimeResults = 0;
 #endif
-int             gReportAverageTimes = 0;
-void            *gIn = NULL;
-void            *gRef = NULL;
-void        *gAllowZ = NULL;
-void            *gOut[ kCallStyleCount ] = { NULL };
-cl_mem          gInBuffer;
-cl_mem          gOutBuffers[ kCallStyleCount ];
-size_t          gComputeDevices = 0;
-uint32_t        gDeviceFrequency = 0;
-int             gWimpyMode = 0;
-int             gWimpyReductionFactor = 128;
-int             gSkipTesting = 0;
-int             gForceFTZ = 0;
-int             gMultithread = 1;
-int             gIsRTZ = 0;
-uint32_t        gSimdSize = 1;
-int             gHasDouble = 0;
-int             gTestDouble = 1;
-const char *    sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
-const int       vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
-int             gMinVectorSize = 0;
-int             gMaxVectorSize = sizeof(vectorSizes) / sizeof( vectorSizes[0] );
-static MTdata   gMTdata;
+int gReportAverageTimes = 0;
+void *gIn = NULL;
+void *gRef = NULL;
+void *gAllowZ = NULL;
+void *gOut[kCallStyleCount] = { NULL };
+cl_mem gInBuffer;
+cl_mem gOutBuffers[kCallStyleCount];
+size_t gComputeDevices = 0;
+uint32_t gDeviceFrequency = 0;
+int gWimpyMode = 0;
+int gWimpyReductionFactor = 128;
+int gSkipTesting = 0;
+int gForceFTZ = 0;
+int gMultithread = 1;
+int gIsRTZ = 0;
+uint32_t gSimdSize = 1;
+int gHasDouble = 0;
+int gTestDouble = 1;
+const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
+const int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
+int gMinVectorSize = 0;
+int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]);
+static MTdata gMTdata;
 
 #pragma mark -
 #pragma mark Declarations
 
-static int ParseArgs( int argc, const char **argv );
-static void PrintUsage( void );
-test_status InitCL( cl_device_id device );
-static int GetTestCase( const char *name, Type *outType, Type *inType, SaturationMode *sat, RoundingMode *round );
-static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMode sat, RoundingMode round, MTdata d );
-static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, cl_kernel *outKernel );
-static int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount );
-
-void *FlushToZero( void );
-void UnFlushToZero( void *);
-
-static cl_program CreateImplicitConvertProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, char testName[256], cl_int *error );
-static cl_program CreateStandardProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, char testName[256], cl_int *error );
-
+static int ParseArgs(int argc, const char **argv);
+static void PrintUsage(void);
+test_status InitCL(cl_device_id device);
+static int GetTestCase(const char *name, Type *outType, Type *inType,
+                       SaturationMode *sat, RoundingMode *round);
+static int DoTest(cl_device_id device, Type outType, Type inType,
+                  SaturationMode sat, RoundingMode round, MTdata d);
+static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
+                              RoundingMode round, int vectorSize,
+                              cl_kernel *outKernel);
+static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf,
+                     size_t blockCount);
+
+void *FlushToZero(void);
+void UnFlushToZero(void *);
 
 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
 // (that's x87 default state).  This causes problems with the tests that
@@ -143,15 +144,16 @@ static inline void Force64BitFPUPrecision(void)
     // divergent code just use inline assembly which works for both.
     unsigned short int orig_cw = 0;
     unsigned short int new_cw = 0;
-    __asm__ __volatile__ ("fstcw %0":"=m" (orig_cw));
-    new_cw = orig_cw | 0x0300;   // set precision to 64-bit
-    __asm__ __volatile__ ("fldcw  %0"::"m" (new_cw));
+    __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
+    new_cw = orig_cw | 0x0300; // set precision to 64-bit
+    __asm__ __volatile__("fldcw  %0" ::"m"(new_cw));
 #else
     /* Implement for other platforms if needed */
 #endif
 }
 
-int test_conversions( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
+int test_conversions(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
     int error, i, testNumber = -1;
     int startMinVectorSize = gMinVectorSize;
@@ -159,109 +161,148 @@ int test_conversions( cl_device_id device, cl_context context, cl_command_queue
     RoundingMode round;
     SaturationMode sat;
 
-    if( argCount )
+    if (argCount)
     {
-        for( i = 0; i < argCount; i++ )
+        for (i = 0; i < argCount; i++)
         {
-            if( GetTestCase( argList[i], &outType, &inType, &sat, &round ) )
+            if (GetTestCase(argList[i], &outType, &inType, &sat, &round))
             {
-                vlog_error( "\n\t\t**** ERROR:  Unable to parse function name %s.  Skipping....  *****\n\n", argList[i] );
+                vlog_error("\n\t\t**** ERROR:  Unable to parse function name "
+                           "%s.  Skipping....  *****\n\n",
+                           argList[i]);
                 continue;
             }
 
             // skip double if we don't have it
-            if( !gTestDouble && (inType == kdouble || outType == kdouble ) )
+            if (!gTestDouble && (inType == kdouble || outType == kdouble))
             {
-                if( gHasDouble )
+                if (gHasDouble)
                 {
-                    vlog_error( "\t *** convert_%sn%s%s( %sn ) FAILED ** \n", gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
-                    vlog( "\t\tcl_khr_fp64 enabled, but double testing turned off.\n" );
+                    vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
+                               gTypeNames[outType], gSaturationNames[sat],
+                               gRoundingModeNames[round], gTypeNames[inType]);
+                    vlog("\t\tcl_khr_fp64 enabled, but double testing turned "
+                         "off.\n");
                 }
 
                 continue;
             }
 
             // skip longs on embedded
-            if( !gHasLong && (inType == klong || outType == klong || inType == kulong || outType == kulong) )
+            if (!gHasLong
+                && (inType == klong || outType == klong || inType == kulong
+                    || outType == kulong))
             {
                 continue;
             }
 
-            // Skip the implicit converts if the rounding mode is not default or test is saturated
-            if( 0 == startMinVectorSize )
+            // Skip the implicit converts if the rounding mode is not default or
+            // test is saturated
+            if (0 == startMinVectorSize)
             {
-                if( sat || round != kDefaultRoundingMode )
+                if (sat || round != kDefaultRoundingMode)
                     gMinVectorSize = 1;
                 else
                     gMinVectorSize = 0;
             }
 
-            if( ( error = DoTest( device, outType, inType, sat, round, gMTdata ) ) )
+            if ((error = DoTest(device, outType, inType, sat, round, gMTdata)))
             {
-                vlog_error( "\t *** convert_%sn%s%s( %sn ) FAILED ** \n", gTypeNames[outType], gSaturationNames[sat], gRoundingModeNames[round], gTypeNames[inType] );
+                vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
+                           gTypeNames[outType], gSaturationNames[sat],
+                           gRoundingModeNames[round], gTypeNames[inType]);
             }
         }
     }
     else
     {
-        for( outType = (Type)0; outType < kTypeCount; outType = (Type)(outType+1) )
+        for (outType = (Type)0; outType < kTypeCount;
+             outType = (Type)(outType + 1))
         {
-            for( inType = (Type)0; inType < kTypeCount; inType = (Type)(inType+1) )
+            for (inType = (Type)0; inType < kTypeCount;
+                 inType = (Type)(inType + 1))
             {
                 // skip longs on embedded
-                if( !gHasLong && (inType == klong || outType == klong || inType == kulong || outType == kulong) )
+                if (!gHasLong
+                    && (inType == klong || outType == klong || inType == kulong
+                        || outType == kulong))
                 {
                     continue;
                 }
 
-                for( sat = (SaturationMode)0; sat < kSaturationModeCount; sat = (SaturationMode)(sat+1) )
+                for (sat = (SaturationMode)0; sat < kSaturationModeCount;
+                     sat = (SaturationMode)(sat + 1))
                 {
-                    //skip illegal saturated conversions to float type
-                    if( kSaturated == sat && ( outType == kfloat || outType == kdouble ) )
+                    // skip illegal saturated conversions to float type
+                    if (kSaturated == sat
+                        && (outType == kfloat || outType == kdouble))
                     {
                         continue;
                     }
 
-                    for( round = (RoundingMode)0; round < kRoundingModeCount; round = (RoundingMode)(round+1) )
+                    for (round = (RoundingMode)0; round < kRoundingModeCount;
+                         round = (RoundingMode)(round + 1))
                     {
-                        if( ++testNumber < gStartTestNumber )
+                        if (++testNumber < gStartTestNumber)
                         {
-                            //     vlog( "%d) skipping convert_%sn%s%s( %sn )\n", testNumber, gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+                            //     vlog( "%d) skipping convert_%sn%s%s( %sn
+                            //     )\n", testNumber, gTypeNames[ outType ],
+                            //     gSaturationNames[ sat ],
+                            //     gRoundingModeNames[round], gTypeNames[inType]
+                            //     );
                             continue;
                         }
                         else
                         {
-                            if( gEndTestNumber > 0 && testNumber >= gEndTestNumber  )
+                            if (gEndTestNumber > 0
+                                && testNumber >= gEndTestNumber)
                             {
                                 goto exit;
                             }
                         }
 
-                        vlog( "%d) Testing convert_%sn%s%s( %sn ):\n", testNumber, gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
+                        vlog("%d) Testing convert_%sn%s%s( %sn ):\n",
+                             testNumber, gTypeNames[outType],
+                             gSaturationNames[sat], gRoundingModeNames[round],
+                             gTypeNames[inType]);
 
                         // skip double if we don't have it
-                        if( ! gTestDouble && (inType == kdouble || outType == kdouble ) )
+                        if (!gTestDouble
+                            && (inType == kdouble || outType == kdouble))
                         {
-                            if( gHasDouble )
+                            if (gHasDouble)
                             {
-                                vlog_error( "\t *** %d) convert_%sn%s%s( %sn ) FAILED ** \n", testNumber, gTypeNames[ outType ], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType] );
-                                vlog( "\t\tcl_khr_fp64 enabled, but double testing turned off.\n" );
+                                vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
+                                           "FAILED ** \n",
+                                           testNumber, gTypeNames[outType],
+                                           gSaturationNames[sat],
+                                           gRoundingModeNames[round],
+                                           gTypeNames[inType]);
+                                vlog("\t\tcl_khr_fp64 enabled, but double "
+                                     "testing turned off.\n");
                             }
                             continue;
                         }
 
-                        // Skip the implicit converts if the rounding mode is not default or test is saturated
-                        if( 0 == startMinVectorSize )
+                        // Skip the implicit converts if the rounding mode is
+                        // not default or test is saturated
+                        if (0 == startMinVectorSize)
                         {
-                            if( sat || round != kDefaultRoundingMode )
+                            if (sat || round != kDefaultRoundingMode)
                                 gMinVectorSize = 1;
                             else
                                 gMinVectorSize = 0;
                         }
 
-                        if( ( error = DoTest( device, outType, inType, sat, round, gMTdata ) ) )
+                        if ((error = DoTest(device, outType, inType, sat, round,
+                                            gMTdata)))
                         {
-                            vlog_error( "\t *** %d) convert_%sn%s%s( %sn ) FAILED ** \n", testNumber, gTypeNames[outType], gSaturationNames[sat], gRoundingModeNames[round], gTypeNames[inType] );
+                            vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
+                                       "FAILED ** \n",
+                                       testNumber, gTypeNames[outType],
+                                       gSaturationNames[sat],
+                                       gRoundingModeNames[round],
+                                       gTypeNames[inType]);
                         }
                     }
                 }
@@ -274,17 +315,17 @@ exit:
 }
 
 test_definition test_list[] = {
-    ADD_TEST( conversions ),
+    ADD_TEST(conversions),
 };
 
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
 
 #pragma mark -
 
-int main (int argc, const char **argv )
+int main(int argc, const char **argv)
 {
     int error;
-    cl_uint seed = (cl_uint) time( NULL );
+    cl_uint seed = (cl_uint)time(NULL);
 
     argc = parseCustomParam(argc, argv);
     if (argc == -1)
@@ -292,15 +333,13 @@ int main (int argc, const char **argv )
         return 1;
     }
 
-    if( (error = ParseArgs( argc, argv )) )
-        return error;
+    if ((error = ParseArgs(argc, argv))) return error;
 
-    //Turn off sleep so our tests run to completion
+    // Turn off sleep so our tests run to completion
     PreventSleep();
-    atexit( ResumeSleep );
+    atexit(ResumeSleep);
 
-    if(!gMultithread)
-        SetThreadCount(1);
+    if (!gMultithread) SetThreadCount(1);
 
 #if defined(_MSC_VER) && defined(_M_IX86)
     // VS2005 (and probably others, since long double got deprecated) sets
@@ -312,14 +351,15 @@ int main (int argc, const char **argv )
     _controlfp_s(&ignored, _PC_64, _MCW_PC);
 #endif
 
-    vlog( "===========================================================\n" );
-    vlog( "Random seed: %u\n", seed );
-    gMTdata = init_genrand( seed );
+    vlog("===========================================================\n");
+    vlog("Random seed: %u\n", seed);
+    gMTdata = init_genrand(seed);
 
-    const char* arg[] = {argv[0]};
-    int ret = runTestHarnessWithCheck( 1, arg, test_num, test_list, true, 0, InitCL );
+    const char *arg[] = { argv[0] };
+    int ret =
+        runTestHarnessWithCheck(1, arg, test_num, test_list, true, 0, InitCL);
 
-    free_mtdata( gMTdata );
+    free_mtdata(gMTdata);
     if (gQueue)
     {
         error = clFinish(gQueue);
@@ -328,7 +368,8 @@ int main (int argc, const char **argv )
 
     clReleaseMemObject(gInBuffer);
 
-    for( int i = 0; i < kCallStyleCount; i++ ) {
+    for (int i = 0; i < kCallStyleCount; i++)
+    {
         clReleaseMemObject(gOutBuffers[i]);
     }
     clReleaseCommandQueue(gQueue);
@@ -340,82 +381,67 @@ int main (int argc, const char **argv )
 #pragma mark -
 #pragma mark setup
 
-static int ParseArgs( int argc, const char **argv )
+static int ParseArgs(int argc, const char **argv)
 {
     int i;
     argList = (const char **)calloc(argc, sizeof(char *));
     argCount = 0;
 
-    if( NULL == argList && argc > 1 )
-        return -1;
+    if (NULL == argList && argc > 1) return -1;
 
-#if (defined( __APPLE__ ) || defined(__linux__) || defined (__MINGW32__))
+#if (defined(__APPLE__) || defined(__linux__) || defined(__MINGW32__))
     { // Extract the app name
-        char baseName[ MAXPATHLEN ];
-        strncpy( baseName, argv[0], MAXPATHLEN );
-        char *base = basename( baseName );
-        if( NULL != base )
+        char baseName[MAXPATHLEN];
+        strncpy(baseName, argv[0], MAXPATHLEN);
+        char *base = basename(baseName);
+        if (NULL != base)
         {
-            strncpy( appName, base, sizeof( appName )  );
-            appName[ sizeof( appName ) -1 ] = '\0';
+            strncpy(appName, base, sizeof(appName));
+            appName[sizeof(appName) - 1] = '\0';
         }
     }
-#elif defined (_WIN32)
+#elif defined(_WIN32)
     {
         char fname[_MAX_FNAME + _MAX_EXT + 1];
         char ext[_MAX_EXT];
 
-        errno_t err = _splitpath_s( argv[0], NULL, 0, NULL, 0,
-                                   fname, _MAX_FNAME, ext, _MAX_EXT );
-        if (err == 0) { // no error
-            strcat (fname, ext); //just cat them, size of frame can keep both
-            strncpy (appName, fname, sizeof(appName));
-            appName[ sizeof( appName ) -1 ] = '\0';
+        errno_t err = _splitpath_s(argv[0], NULL, 0, NULL, 0, fname, _MAX_FNAME,
+                                   ext, _MAX_EXT);
+        if (err == 0)
+        { // no error
+            strcat(fname, ext); // just cat them, size of frame can keep both
+            strncpy(appName, fname, sizeof(appName));
+            appName[sizeof(appName) - 1] = '\0';
         }
     }
 #endif
 
-    vlog( "\n%s", appName );
-    for( i = 1; i < argc; i++ )
+    vlog("\n%s", appName);
+    for (i = 1; i < argc; i++)
     {
         const char *arg = argv[i];
-        if( NULL == arg )
-            break;
+        if (NULL == arg) break;
 
-        vlog( "\t%s", arg );
-        if( arg[0] == '-' )
+        vlog("\t%s", arg);
+        if (arg[0] == '-')
         {
             arg++;
-            while( *arg != '\0' )
+            while (*arg != '\0')
             {
-                switch( *arg )
+                switch (*arg)
                 {
-                    case 'd':
-                        gTestDouble ^= 1;
-                        break;
-                    case 'l':
-                        gSkipTesting ^= 1;
-                        break;
-                    case 'm':
-                        gMultithread ^= 1;
-                        break;
-                    case 'w':
-                        gWimpyMode ^= 1;
-                        break;
+                    case 'd': gTestDouble ^= 1; break;
+                    case 'l': gSkipTesting ^= 1; break;
+                    case 'm': gMultithread ^= 1; break;
+                    case 'w': gWimpyMode ^= 1; break;
                     case '[':
                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
                         break;
-                    case 'z':
-                        gForceFTZ ^= 1;
-                        break;
-                    case 't':
-                        gTimeResults ^= 1;
-                        break;
-                    case 'a':
-                        gReportAverageTimes ^= 1;
-                        break;
+                    case 'z': gForceFTZ ^= 1; break;
+                    case 't': gTimeResults ^= 1; break;
+                    case 'a': gReportAverageTimes ^= 1; break;
                     case '1':
-                        if( arg[1] == '6' )
+                        if (arg[1] == '6')
                         {
                             gMinVectorSize = 6;
                             gMaxVectorSize = 7;
@@ -449,7 +475,7 @@ static int ParseArgs( int argc, const char **argv )
                         break;
 
                     default:
-                        vlog( " <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg );
+                        vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
                         PrintUsage();
                         return -1;
                 }
@@ -459,128 +485,136 @@ static int ParseArgs( int argc, const char **argv )
         else
         {
             char *t = NULL;
-            long number = strtol( arg, &t, 0 );
-            if( t != arg )
+            long number = strtol(arg, &t, 0);
+            if (t != arg)
             {
-                if( gStartTestNumber != -1 )
-                    gEndTestNumber = gStartTestNumber + (int) number;
+                if (gStartTestNumber != -1)
+                    gEndTestNumber = gStartTestNumber + (int)number;
                 else
-                    gStartTestNumber = (int) number;
+                    gStartTestNumber = (int)number;
             }
             else
             {
-                argList[ argCount ] = arg;
+                argList[argCount] = arg;
                 argCount++;
             }
         }
     }
 
     // Check for the wimpy mode environment variable
-    if (getenv("CL_WIMPY_MODE")) {
-      vlog( "\n" );
-      vlog( "*** Detected CL_WIMPY_MODE env                          ***\n" );
-      gWimpyMode = 1;
+    if (getenv("CL_WIMPY_MODE"))
+    {
+        vlog("\n");
+        vlog("*** Detected CL_WIMPY_MODE env                          ***\n");
+        gWimpyMode = 1;
     }
 
     vlog( "\n" );
 
     PrintArch();
 
-    if( gWimpyMode )
+    if (gWimpyMode)
     {
-        vlog( "\n" );
-        vlog( "*** WARNING: Testing in Wimpy mode!                     ***\n" );
-        vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
-        vlog( "*** It gives warm fuzzy feelings and then nevers calls. ***\n\n" );
-        vlog("*** Wimpy Reduction Factor: %-27u ***\n\n", gWimpyReductionFactor);
+        vlog("\n");
+        vlog("*** WARNING: Testing in Wimpy mode!                     ***\n");
+        vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
+        vlog("*** It gives warm fuzzy feelings and then nevers calls. ***\n\n");
+        vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
+             gWimpyReductionFactor);
     }
 
     return 0;
 }
 
-static void PrintUsage( void )
+static void PrintUsage(void)
 {
     int i;
-    vlog( "%s [-wz#]: <optional: test names>\n", appName );
-    vlog( "\ttest names:\n" );
-    vlog( "\t\tdestFormat<_sat><_round>_sourceFormat\n" );
-    vlog( "\t\t\tPossible format types are:\n\t\t\t\t" );
-    for( i = 0; i < kTypeCount; i++ )
-        vlog( "%s, ", gTypeNames[i] );
-    vlog( "\n\n\t\t\tPossible saturation values are: (empty) and _sat\n" );
-    vlog( "\t\t\tPossible rounding values are:\n\t\t\t\t(empty), " );
-    for( i = 1; i < kRoundingModeCount; i++ )
-        vlog( "%s, ", gRoundingModeNames[i] );
-    vlog( "\n\t\t\tExamples:\n" );
-    vlog( "\t\t\t\tulong_short   converts short to ulong\n" );
-    vlog( "\t\t\t\tchar_sat_rte_float   converts float to char with saturated clipping in round to nearest rounding mode\n\n" );
-    vlog( "\toptions:\n" );
-    vlog( "\t\t-d\tToggle testing of double precision.  On by default if cl_khr_fp64 is enabled, ignored otherwise.\n" );
-    vlog( "\t\t-l\tToggle link check mode. When on, testing is skipped, and we just check to see that the kernels build. (Off by default.)\n" );
-    vlog( "\t\t-m\tToggle Multithreading. (On by default.)\n" );
-    vlog( "\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very small subset of the tests for each fn. NOT A VALID TEST! (Off by default.)\n" );
-    vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
-    vlog( "\t\t-z\tToggle flush to zero mode  (Default: per device)\n" );
-    vlog( "\t\t-#\tTest just vector size given by #, where # is an element of the set {1,2,3,4,8,16}\n" );
-    vlog( "\n" );
-    vlog( "You may also pass the number of the test on which to start.\nA second number can be then passed to indicate how many tests to run\n\n" );
+    vlog("%s [-wz#]: <optional: test names>\n", appName);
+    vlog("\ttest names:\n");
+    vlog("\t\tdestFormat<_sat><_round>_sourceFormat\n");
+    vlog("\t\t\tPossible format types are:\n\t\t\t\t");
+    for (i = 0; i < kTypeCount; i++) vlog("%s, ", gTypeNames[i]);
+    vlog("\n\n\t\t\tPossible saturation values are: (empty) and _sat\n");
+    vlog("\t\t\tPossible rounding values are:\n\t\t\t\t(empty), ");
+    for (i = 1; i < kRoundingModeCount; i++)
+        vlog("%s, ", gRoundingModeNames[i]);
+    vlog("\n\t\t\tExamples:\n");
+    vlog("\t\t\t\tulong_short   converts short to ulong\n");
+    vlog("\t\t\t\tchar_sat_rte_float   converts float to char with saturated "
+         "clipping in round to nearest rounding mode\n\n");
+    vlog("\toptions:\n");
+    vlog("\t\t-d\tToggle testing of double precision.  On by default if "
+         "cl_khr_fp64 is enabled, ignored otherwise.\n");
+    vlog("\t\t-l\tToggle link check mode. When on, testing is skipped, and we "
+         "just check to see that the kernels build. (Off by default.)\n");
+    vlog("\t\t-m\tToggle Multithreading. (On by default.)\n");
+    vlog("\t\t-w\tToggle wimpy mode. When wimpy mode is on, we run a very "
+         "small subset of the tests for each fn. NOT A VALID TEST! (Off by "
+         "default.)\n");
+    vlog(" \t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
+         "1-12, default factor(%u)\n",
+         gWimpyReductionFactor);
+    vlog("\t\t-z\tToggle flush to zero mode  (Default: per device)\n");
+    vlog("\t\t-#\tTest just vector size given by #, where # is an element of "
+         "the set {1,2,3,4,8,16}\n");
+    vlog("\n");
+    vlog(
+        "You may also pass the number of the test on which to start.\nA second "
+        "number can be then passed to indicate how many tests to run\n\n");
 }
 
 
-static int GetTestCase( const char *name, Type *outType, Type *inType, SaturationMode *sat, RoundingMode *round )
+static int GetTestCase(const char *name, Type *outType, Type *inType,
+                       SaturationMode *sat, RoundingMode *round)
 {
     int i;
 
-    //Find the return type
-    for( i = 0; i < kTypeCount; i++ )
-        if( name == strstr( name, gTypeNames[i] ) )
+    // Find the return type
+    for (i = 0; i < kTypeCount; i++)
+        if (name == strstr(name, gTypeNames[i]))
         {
             *outType = (Type)i;
-            name += strlen( gTypeNames[i] );
+            name += strlen(gTypeNames[i]);
 
             break;
         }
 
-    if( i == kTypeCount )
-        return -1;
+    if (i == kTypeCount) return -1;
 
     // Check to see if _sat appears next
     *sat = (SaturationMode)0;
-    for( i = 1; i < kSaturationModeCount; i++ )
-        if( name == strstr( name, gSaturationNames[i] ) )
+    for (i = 1; i < kSaturationModeCount; i++)
+        if (name == strstr(name, gSaturationNames[i]))
         {
             *sat = (SaturationMode)i;
-            name += strlen( gSaturationNames[i] );
+            name += strlen(gSaturationNames[i]);
             break;
         }
 
     *round = (RoundingMode)0;
-    for( i = 1; i < kRoundingModeCount; i++ )
-        if( name == strstr( name, gRoundingModeNames[i] ) )
+    for (i = 1; i < kRoundingModeCount; i++)
+        if (name == strstr(name, gRoundingModeNames[i]))
         {
             *round = (RoundingMode)i;
-            name += strlen( gRoundingModeNames[i] );
+            name += strlen(gRoundingModeNames[i]);
             break;
         }
 
-    if( *name != '_' )
-        return -2;
+    if (*name != '_') return -2;
     name++;
 
-    for( i = 0; i < kTypeCount; i++ )
-        if( name == strstr( name, gTypeNames[i] ) )
+    for (i = 0; i < kTypeCount; i++)
+        if (name == strstr(name, gTypeNames[i]))
         {
             *inType = (Type)i;
-            name += strlen( gTypeNames[i] );
+            name += strlen(gTypeNames[i]);
 
             break;
         }
 
-    if( i == kTypeCount )
-        return -3;
+    if (i == kTypeCount) return -3;
 
-    if( *name != '\0' )
-        return -4;
+    if (*name != '\0') return -4;
 
     return 0;
 }
@@ -588,270 +622,275 @@ static int GetTestCase( const char *name, Type *outType, Type *inType, Saturatio
 #pragma mark -
 #pragma mark OpenCL
 
-test_status InitCL( cl_device_id device )
+test_status InitCL(cl_device_id device)
 {
     int error, i;
-    size_t configSize = sizeof( gComputeDevices );
+    size_t configSize = sizeof(gComputeDevices);
 
-    if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
+    if ((error = clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS,
+                                 configSize, &gComputeDevices, NULL)))
         gComputeDevices = 1;
 
-    configSize = sizeof( gDeviceFrequency );
-    if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
+    configSize = sizeof(gDeviceFrequency);
+    if ((error = clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY,
+                                 configSize, &gDeviceFrequency, NULL)))
         gDeviceFrequency = 0;
 
     cl_device_fp_config floatCapabilities = 0;
-    if( (error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(floatCapabilities), &floatCapabilities,  NULL)))
+    if ((error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
+                                 sizeof(floatCapabilities), &floatCapabilities,
+                                 NULL)))
         floatCapabilities = 0;
-    if(0 == (CL_FP_DENORM & floatCapabilities) )
-        gForceFTZ ^= 1;
+    if (0 == (CL_FP_DENORM & floatCapabilities)) gForceFTZ ^= 1;
 
-    if( 0 == (floatCapabilities & CL_FP_ROUND_TO_NEAREST ) )
+    if (0 == (floatCapabilities & CL_FP_ROUND_TO_NEAREST))
     {
         char profileStr[128] = "";
         // Verify that we are an embedded profile device
-        if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof( profileStr ), profileStr, NULL ) ) )
+        if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE,
+                                     sizeof(profileStr), profileStr, NULL)))
         {
-            vlog_error( "FAILURE: Could not get device profile: error %d\n", error );
+            vlog_error("FAILURE: Could not get device profile: error %d\n",
+                       error);
             return TEST_FAIL;
         }
 
-        if( strcmp( profileStr, "EMBEDDED_PROFILE" ) )
+        if (strcmp(profileStr, "EMBEDDED_PROFILE"))
         {
-            vlog_error( "FAILURE: non-embedded profile device does not support CL_FP_ROUND_TO_NEAREST\n" );
+            vlog_error("FAILURE: non-embedded profile device does not support "
+                       "CL_FP_ROUND_TO_NEAREST\n");
             return TEST_FAIL;
         }
 
-        if( 0 == (floatCapabilities & CL_FP_ROUND_TO_ZERO ) )
+        if (0 == (floatCapabilities & CL_FP_ROUND_TO_ZERO))
         {
-            vlog_error( "FAILURE: embedded profile device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" );
+            vlog_error("FAILURE: embedded profile device supports neither "
+                       "CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n");
             return TEST_FAIL;
         }
 
         gIsRTZ = 1;
     }
 
-    else if(is_extension_available(device, "cl_khr_fp64"))
+    else if (is_extension_available(device, "cl_khr_fp64"))
     {
         gHasDouble = 1;
     }
     gTestDouble &= gHasDouble;
 
-    //detect whether profile of the device is embedded
-    char profile[1024] = "";
-    if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) ){}
-    else if( strstr(profile, "EMBEDDED_PROFILE" ) )
-    {
-        gIsEmbedded = 1;
-        if( !is_extension_available(device, "cles_khr_int64" ) )
-            gHasLong = 0;
-    }
-
-
-    gContext = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
-    if( NULL == gContext || error )
+    gContext = clCreateContext(NULL, 1, &device, notify_callback, NULL, &error);
+    if (NULL == gContext || error)
     {
-        vlog_error( "clCreateContext failed. (%d)\n", error );
+        vlog_error("clCreateContext failed. (%d)\n", error);
         return TEST_FAIL;
     }
 
     gQueue = clCreateCommandQueue(gContext, device, 0, &error);
-    if( NULL == gQueue || error )
+    if (NULL == gQueue || error)
     {
-        vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
+        vlog_error("clCreateCommandQueue failed. (%d)\n", error);
         return TEST_FAIL;
     }
 
-    //Allocate buffers
-    //FIXME: use clProtectedArray for guarded allocations?
-    gIn   = malloc( BUFFER_SIZE + 2 * kPageSize );
-    gAllowZ = malloc( BUFFER_SIZE + 2 * kPageSize );
-    gRef  = malloc( BUFFER_SIZE + 2 * kPageSize );
-    for( i = 0; i < kCallStyleCount; i++ )
+    // Allocate buffers
+    // FIXME: use clProtectedArray for guarded allocations?
+    gIn = malloc(BUFFER_SIZE + 2 * kPageSize);
+    gAllowZ = malloc(BUFFER_SIZE + 2 * kPageSize);
+    gRef = malloc(BUFFER_SIZE + 2 * kPageSize);
+    for (i = 0; i < kCallStyleCount; i++)
     {
-        gOut[i] = malloc( BUFFER_SIZE + 2 * kPageSize );
-        if( NULL == gOut[i] )
-            return TEST_FAIL;
+        gOut[i] = malloc(BUFFER_SIZE + 2 * kPageSize);
+        if (NULL == gOut[i]) return TEST_FAIL;
     }
 
     // setup input buffers
-    gInBuffer = clCreateBuffer(gContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, BUFFER_SIZE, NULL, &error);
-    if( gInBuffer == NULL || error)
+    gInBuffer =
+        clCreateBuffer(gContext, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+                       BUFFER_SIZE, NULL, &error);
+    if (gInBuffer == NULL || error)
     {
-        vlog_error( "clCreateBuffer failed for input (%d)\n", error );
+        vlog_error("clCreateBuffer failed for input (%d)\n", error);
         return TEST_FAIL;
     }
 
     // setup output buffers
-    for( i = 0; i < kCallStyleCount; i++ )
+    for (i = 0; i < kCallStyleCount; i++)
     {
-        gOutBuffers[i] = clCreateBuffer(  gContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, BUFFER_SIZE, NULL, &error );
-        if( gOutBuffers[i] == NULL || error )
+        gOutBuffers[i] =
+            clCreateBuffer(gContext, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+                           BUFFER_SIZE, NULL, &error);
+        if (gOutBuffers[i] == NULL || error)
         {
-            vlog_error( "clCreateArray failed for output (%d)\n", error );
+            vlog_error("clCreateArray failed for output (%d)\n", error);
             return TEST_FAIL;
         }
     }
 
 
-    gMTdata = init_genrand( gRandomSeed );
+    gMTdata = init_genrand(gRandomSeed);
 
 
     char c[1024];
     static const char *no_yes[] = { "NO", "YES" };
-    vlog( "\nCompute Device info:\n" );
+    vlog("\nCompute Device info:\n");
     clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(c), c, NULL);
-    vlog( "\tDevice Name: %s\n", c );
+    vlog("\tDevice Name: %s\n", c);
     clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(c), c, NULL);
-    vlog( "\tVendor: %s\n", c );
+    vlog("\tVendor: %s\n", c);
     clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(c), c, NULL);
-    vlog( "\tDevice Version: %s\n", c );
+    vlog("\tDevice Version: %s\n", c);
     clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
-    vlog( "\tCL C Version: %s\n", c );
+    vlog("\tCL C Version: %s\n", c);
     clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(c), c, NULL);
-    vlog( "\tDriver Version: %s\n", c );
-    vlog( "\tProcessing with %ld devices\n", gComputeDevices );
-    vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
-    vlog( "\tSubnormal values supported for floats? %s\n", no_yes[0 != (CL_FP_DENORM & floatCapabilities)] );
-    vlog( "\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ] );
-    vlog( "\tTesting with default RTZ mode for floats? %s\n", no_yes[0 != gIsRTZ] );
-    vlog( "\tHas Double? %s\n", no_yes[0 != gHasDouble] );
-    if( gHasDouble )
-        vlog( "\tTest Double? %s\n", no_yes[0 != gTestDouble] );
-    vlog( "\tHas Long? %s\n", no_yes[0 != gHasLong] );
-    vlog( "\tTesting vector sizes: " );
-    for( i = gMinVectorSize; i < gMaxVectorSize; i++ )
+    vlog("\tDriver Version: %s\n", c);
+    vlog("\tProcessing with %ld devices\n", gComputeDevices);
+    vlog("\tDevice Frequency: %d MHz\n", gDeviceFrequency);
+    vlog("\tSubnormal values supported for floats? %s\n",
+         no_yes[0 != (CL_FP_DENORM & floatCapabilities)]);
+    vlog("\tTesting with FTZ mode ON for floats? %s\n", no_yes[0 != gForceFTZ]);
+    vlog("\tTesting with default RTZ mode for floats? %s\n",
+         no_yes[0 != gIsRTZ]);
+    vlog("\tHas Double? %s\n", no_yes[0 != gHasDouble]);
+    if (gHasDouble) vlog("\tTest Double? %s\n", no_yes[0 != gTestDouble]);
+    vlog("\tHas Long? %s\n", no_yes[0 != gHasLong]);
+    vlog("\tTesting vector sizes: ");
+    for (i = gMinVectorSize; i < gMaxVectorSize; i++)
         vlog("\t%d", vectorSizes[i]);
-    vlog( "\n" );
+    vlog("\n");
     return TEST_PASS;
 }
 
-static int RunKernel( cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount )
+static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf,
+                     size_t blockCount)
 {
-    // The global dimensions are just the blockCount to execute since we haven't set up multiple queues for multiple devices.
+    // The global dimensions are just the blockCount to execute since we haven't
+    // set up multiple queues for multiple devices.
     int error;
 
-    error = clSetKernelArg(kernel, 0, sizeof( inBuf ), &inBuf);
+    error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf);
     error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf);
 
-    if( error )
+    if (error)
     {
-        vlog_error( "FAILED -- could not set kernel args (%d)\n", error );
+        vlog_error("FAILED -- could not set kernel args (%d)\n", error);
         return error;
     }
 
-    if( (error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, NULL, 0, NULL, NULL)))
+    if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount,
+                                        NULL, 0, NULL, NULL)))
     {
-        vlog_error( "FAILED -- could not execute kernel (%d)\n", error );
+        vlog_error("FAILED -- could not execute kernel (%d)\n", error);
         return error;
     }
 
     return 0;
 }
 
-#if ! defined( __APPLE__ )
-void memset_pattern4(void *dest, const void *src_pattern, size_t bytes );
-#endif
-
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
 #include <mach/mach_time.h>
 #endif
 
-uint64_t GetTime( void );
-uint64_t GetTime( void )
+uint64_t GetTime(void);
+uint64_t GetTime(void)
 {
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
     return mach_absolute_time();
 #elif defined(_MSC_VER)
-    return  ReadTime();
+    return ReadTime();
 #else
-    //mach_absolute_time is a high precision timer with precision < 1 microsecond.
+    // mach_absolute_time is a high precision timer with precision < 1
+    // microsecond.
 #warning need accurate clock here.  Times are invalid.
     return 0;
 #endif
 }
 
 
-#if defined (_MSC_VER)
+#if defined(_MSC_VER)
 /* function is defined in "compat.h" */
 #else
-double SubtractTime( uint64_t endTime, uint64_t startTime );
-double SubtractTime( uint64_t endTime, uint64_t startTime )
+double SubtractTime(uint64_t endTime, uint64_t startTime);
+double SubtractTime(uint64_t endTime, uint64_t startTime)
 {
     uint64_t diff = endTime - startTime;
     static double conversion = 0.0;
 
-    if( 0.0 == conversion )
+    if (0.0 == conversion)
     {
-#if defined( __APPLE__ )
-        mach_timebase_info_data_t info = {0,0};
-        kern_return_t   err = mach_timebase_info( &info );
-        if( 0 == err )
-            conversion = 1e-9 * (double) info.numer / (double) info.denom;
+#if defined(__APPLE__)
+        mach_timebase_info_data_t info = { 0, 0 };
+        kern_return_t err = mach_timebase_info(&info);
+        if (0 == err)
+            conversion = 1e-9 * (double)info.numer / (double)info.denom;
 #else
-        // This function consumes output from GetTime() above, and converts the time to secionds.
+        // This function consumes output from GetTime() above, and converts the
+        // time to secionds.
 #warning need accurate ticks to seconds conversion factor here. Times are invalid.
 #endif
     }
 
     // strictly speaking we should also be subtracting out timer latency here
-    return conversion * (double) diff;
+    return conversion * (double)diff;
 }
 #endif
 
 typedef struct CalcReferenceValuesInfo
 {
-    struct WriteInputBufferInfo *parent;        // pointer back to the parent WriteInputBufferInfo struct
-    cl_kernel                   kernel;         // the kernel for this vector size
-    cl_program                  program;        // the program for this vector size
-    cl_uint                     vectorSize;     // the vector size for this callback chain
-    void                        *p;             // the pointer to mapped result data for this vector size
-    cl_int                      result;
-}CalcReferenceValuesInfo;
+    struct WriteInputBufferInfo
+        *parent; // pointer back to the parent WriteInputBufferInfo struct
+    cl_kernel kernel; // the kernel for this vector size
+    cl_program program; // the program for this vector size
+    cl_uint vectorSize; // the vector size for this callback chain
+    void *p; // the pointer to mapped result data for this vector size
+    cl_int result;
+} CalcReferenceValuesInfo;
 
 typedef struct WriteInputBufferInfo
 {
-    volatile cl_event           calcReferenceValues;   // user event which signals when main thread is done calculating reference values
-    volatile cl_event           doneBarrier;     // user event which signals when worker threads are done
-    cl_uint                     count;           // the number of elements in the array
-    Type                        outType;         // the data type of the conversion result
-    Type                        inType;          // the data type of the conversion input
-    volatile int                barrierCount;
-    CalcReferenceValuesInfo     calcInfo[kCallStyleCount];
-}WriteInputBufferInfo;
-
-cl_uint RoundUpToNextPowerOfTwo( cl_uint x );
-cl_uint RoundUpToNextPowerOfTwo( cl_uint x )
+    volatile cl_event
+        calcReferenceValues; // user event which signals when main thread is
+                             // done calculating reference values
+    volatile cl_event
+        doneBarrier; // user event which signals when worker threads are done
+    cl_uint count; // the number of elements in the array
+    Type outType; // the data type of the conversion result
+    Type inType; // the data type of the conversion input
+    volatile int barrierCount;
+    CalcReferenceValuesInfo calcInfo[kCallStyleCount];
+} WriteInputBufferInfo;
+
+cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
+cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
 {
-    if( 0 == (x & (x-1)))
-        return x;
+    if (0 == (x & (x - 1))) return x;
 
-    while( x & (x-1) )
-       x &= x-1;
+    while (x & (x - 1)) x &= x - 1;
 
     return x + x;
 }
 
-void CL_CALLBACK WriteInputBufferComplete( cl_event, cl_int, void * );
+void WriteInputBufferComplete(void *);
 
 typedef struct DataInitInfo
 {
-    cl_ulong        start;
-    cl_uint         size;
-    Type            outType;
-    Type            inType;
-    SaturationMode  sat;
-    RoundingMode    round;
-    MTdata          *d;
-}DataInitInfo;
-
-cl_int InitData( cl_uint job_id, cl_uint thread_id, void *p );
-cl_int InitData( cl_uint job_id, cl_uint thread_id, void *p )
+    cl_ulong start;
+    cl_uint size;
+    Type outType;
+    Type inType;
+    SaturationMode sat;
+    RoundingMode round;
+    MTdata *d;
+} DataInitInfo;
+
+cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p);
+cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
 {
-    DataInitInfo *info = (DataInitInfo*) p;
+    DataInitInfo *info = (DataInitInfo *)p;
 
-    gInitFunctions[ info->inType ]( (char*)gIn + job_id * info->size * gTypeSizes[info->inType], info->sat, info->round,
-                                   info->outType, info->start + job_id * info->size, info->size, info->d[thread_id] );
+    gInitFunctions[info->inType](
+        (char *)gIn + job_id * info->size * gTypeSizes[info->inType], info->sat,
+        info->round, info->outType, info->start + job_id * info->size,
+        info->size, info->d[thread_id]);
     return CL_SUCCESS;
 }
 
@@ -859,13 +898,13 @@ static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
 {
     cl_uint i;
     for (i = 0; i < count; ++i)
-    allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
+        allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
 }
 
-cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p );
-cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p )
+cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
+cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
 {
-    DataInitInfo *info = (DataInitInfo*) p;
+    DataInitInfo *info = (DataInitInfo *)p;
     cl_uint count = info->size;
     Type inType = info->inType;
     Type outType = info->outType;
@@ -874,16 +913,15 @@ cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p )
 
     Force64BitFPUPrecision();
 
-    void *s = (cl_uchar*) gIn + job_id * count * gTypeSizes[info->inType];
-    void *a = (cl_uchar*) gAllowZ + job_id * count;
-    void *d = (cl_uchar*) gRef + job_id * count * gTypeSizes[info->outType];
+    void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType];
+    void *a = (cl_uchar *)gAllowZ + job_id * count;
+    void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
 
     if (outType != inType)
     {
-        //create the reference while we wait
-        Convert f = gConversions[ outType ][ inType ];
-        if( info->sat )
-            f = gSaturatedConversions[ outType ][ inType ];
+        // create the reference while we wait
+        Convert f = gConversions[outType][inType];
+        if (info->sat) f = gSaturatedConversions[outType][inType];
 
 #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
         /* ARM VFP doesn't have hardware instruction for converting from 64-bit
@@ -900,42 +938,34 @@ cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p )
         switch (round)
         {
             /* conversions to floating-point type use the current rounding mode.
-             * The only default floating-point rounding mode supported is round to nearest even
-             * i.e the current rounding mode will be _rte for floating-point types. */
-            case kDefaultRoundingMode:
-                    qcom_rm = qcomRTE;
-                    break;
-            case kRoundToNearestEven:
-                    qcom_rm = qcomRTE;
-                    break;
-            case kRoundUp:
-                    qcom_rm = qcomRTP;
-                    break;
-            case kRoundDown:
-                    qcom_rm = qcomRTN;
-                    break;
-            case kRoundTowardZero:
-                    qcom_rm = qcomRTZ;
-                    break;
+             * The only default floating-point rounding mode supported is round
+             * to nearest even i.e the current rounding mode will be _rte for
+             * floating-point types. */
+            case kDefaultRoundingMode: qcom_rm = qcomRTE; break;
+            case kRoundToNearestEven: qcom_rm = qcomRTE; break;
+            case kRoundUp: qcom_rm = qcomRTP; break;
+            case kRoundDown: qcom_rm = qcomRTN; break;
+            case kRoundTowardZero: qcom_rm = qcomRTZ; break;
             default:
-                    vlog_error("ERROR: undefined rounding mode %d\n", round);
-                    break;
+                vlog_error("ERROR: undefined rounding mode %d\n", round);
+                break;
         }
-        qcom_sat =  info->sat;
+        qcom_sat = info->sat;
 #endif
 
-        RoundingMode oldRound = set_round( round, outType );
-        f( d, s, count );
-        set_round( oldRound, outType );
+        RoundingMode oldRound = set_round(round, outType);
+        f(d, s, count);
+        set_round(oldRound, outType);
 
-    // Decide if we allow a zero result in addition to the correctly rounded one
+        // Decide if we allow a zero result in addition to the correctly rounded
+        // one
         memset(a, 0, count);
-    if (gForceFTZ) {
-        if (inType == kfloat)
-        setAllowZ((uint8_t*)a, (uint32_t*)s, count);
-        if (outType == kfloat)
-        setAllowZ((uint8_t*)a, (uint32_t*)d, count);
-    }
+        if (gForceFTZ)
+        {
+            if (inType == kfloat) setAllowZ((uint8_t *)a, (uint32_t *)s, count);
+            if (outType == kfloat)
+                setAllowZ((uint8_t *)a, (uint32_t *)d, count);
+        }
     }
     else
     {
@@ -943,46 +973,48 @@ cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p )
         memcpy(d, s, info->size * gTypeSizes[inType]);
     }
 
-    //Patch up NaNs conversions to integer to zero -- these can be converted to any integer
-    if( info->outType != kfloat && info->outType != kdouble )
+    // Patch up NaNs conversions to integer to zero -- these can be converted to
+    // any integer
+    if (info->outType != kfloat && info->outType != kdouble)
     {
-        if( inType == kfloat )
+        if (inType == kfloat)
         {
-            float *inp = (float*) s;
-            for( j = 0; j < count; j++ )
+            float *inp = (float *)s;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) )
-                    memset( (char*) d + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+                if (isnan(inp[j]))
+                    memset((char *)d + j * gTypeSizes[outType], 0,
+                           gTypeSizes[outType]);
             }
         }
-        if( inType == kdouble )
+        if (inType == kdouble)
         {
-            double *inp = (double*) s;
-            for( j = 0; j < count; j++ )
+            double *inp = (double *)s;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) )
-                    memset( (char*) d + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+                if (isnan(inp[j]))
+                    memset((char *)d + j * gTypeSizes[outType], 0,
+                           gTypeSizes[outType]);
             }
         }
     }
-    else if( inType == kfloat || inType == kdouble )
-    {  // outtype and intype is float or double.  NaN conversions for float <-> double can be any NaN
-        if( inType == kfloat && outType == kdouble )
+    else if (inType == kfloat || inType == kdouble)
+    { // outtype and intype is float or double.  NaN conversions for float <->
+      // double can be any NaN
+        if (inType == kfloat && outType == kdouble)
         {
-            float *inp = (float*) s;
-            for( j = 0; j < count; j++ )
+            float *inp = (float *)s;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) )
-                    ((double*) d)[j] = NAN;
+                if (isnan(inp[j])) ((double *)d)[j] = NAN;
             }
         }
-        if( inType == kdouble && outType == kfloat )
+        if (inType == kdouble && outType == kfloat)
         {
-            double *inp = (double*) s;
-            for( j = 0; j < count; j++ )
+            double *inp = (double *)s;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) )
-                    ((float*) d)[j] = NAN;
+                if (isnan(inp[j])) ((float *)d)[j] = NAN;
             }
         }
     }
@@ -990,13 +1022,14 @@ cl_int PrepareReference( cl_uint job_id, cl_uint thread_id, void *p )
     return CL_SUCCESS;
 }
 
-static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMode sat, RoundingMode round, MTdata d )
+static int DoTest(cl_device_id device, Type outType, Type inType,
+                  SaturationMode sat, RoundingMode round, MTdata d)
 {
 #ifdef __APPLE__
     cl_ulong wall_start = mach_absolute_time();
 #endif
 
-    DataInitInfo  init_info = { 0, 0, outType, inType, sat, round, NULL };
+    DataInitInfo init_info = { 0, 0, outType, inType, sat, round, NULL };
     WriteInputBufferInfo writeInputBufferInfo;
     int vectorSize;
     int error = 0;
@@ -1007,22 +1040,23 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
     size_t blockCount =
         BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
     size_t step = blockCount;
-    uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]);
-    cl_event writeInputBuffer = NULL;
+    uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]);
 
-    memset( &writeInputBufferInfo, 0, sizeof( writeInputBufferInfo ) );
-    init_info.d = (MTdata*)malloc( threads * sizeof( MTdata ) );
-    if( NULL == init_info.d )
+    memset(&writeInputBufferInfo, 0, sizeof(writeInputBufferInfo));
+    init_info.d = (MTdata *)malloc(threads * sizeof(MTdata));
+    if (NULL == init_info.d)
     {
-        vlog_error( "ERROR: Unable to allocate storage for random number generator!\n" );
+        vlog_error(
+            "ERROR: Unable to allocate storage for random number generator!\n");
         return -1;
     }
-    for( i = 0; i < threads; i++ )
+    for (i = 0; i < threads; i++)
     {
-        init_info.d[i] = init_genrand( genrand_int32( d ) );
-        if( NULL == init_info.d[i] )
+        init_info.d[i] = init_genrand(genrand_int32(d));
+        if (NULL == init_info.d[i])
         {
-            vlog_error( "ERROR: Unable to allocate storage for random number generator!\n" );
+            vlog_error("ERROR: Unable to allocate storage for random number "
+                       "generator!\n");
             return -1;
         }
     }
@@ -1030,52 +1064,53 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
     writeInputBufferInfo.outType = outType;
     writeInputBufferInfo.inType = inType;
 
-    for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+    for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
     {
-        writeInputBufferInfo.calcInfo[vectorSize].program = MakeProgram( outType, inType, sat, round, vectorSize,
-                                                                        &writeInputBufferInfo.calcInfo[vectorSize].kernel );
-        if( NULL == writeInputBufferInfo.calcInfo[vectorSize].program )
+        writeInputBufferInfo.calcInfo[vectorSize].program =
+            MakeProgram(outType, inType, sat, round, vectorSize,
+                        &writeInputBufferInfo.calcInfo[vectorSize].kernel);
+        if (NULL == writeInputBufferInfo.calcInfo[vectorSize].program)
         {
             gFailCount++;
             return -1;
         }
-        if( NULL == writeInputBufferInfo.calcInfo[vectorSize].kernel )
+        if (NULL == writeInputBufferInfo.calcInfo[vectorSize].kernel)
         {
             gFailCount++;
-            vlog_error( "\t\tFAILED -- Failed to create kernel.\n" );
+            vlog_error("\t\tFAILED -- Failed to create kernel.\n");
             return -2;
         }
 
-        writeInputBufferInfo.calcInfo[vectorSize].parent = &writeInputBufferInfo;
+        writeInputBufferInfo.calcInfo[vectorSize].parent =
+            &writeInputBufferInfo;
         writeInputBufferInfo.calcInfo[vectorSize].vectorSize = vectorSize;
         writeInputBufferInfo.calcInfo[vectorSize].result = -1;
     }
 
-    if( gSkipTesting )
-        goto exit;
+    if (gSkipTesting) goto exit;
 
     // Patch up rounding mode if default is RTZ
-    // We leave the part above in default rounding mode so that the right kernel is compiled.
-    if( round == kDefaultRoundingMode && gIsRTZ && (outType == kfloat) )
+    // We leave the part above in default rounding mode so that the right kernel
+    // is compiled.
+    if (round == kDefaultRoundingMode && gIsRTZ && (outType == kfloat))
         init_info.round = round = kRoundTowardZero;
 
     // Figure out how many elements are in a work block
 
     // we handle 64-bit types a bit differently.
-    if( 8*gTypeSizes[ inType ] > 32 )
-        lastCase = 0x100000000ULL;
+    if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL;
 
-    if ( !gWimpyMode && gIsEmbedded )
-      step = blockCount * EMBEDDED_REDUCTION_FACTOR;
+    if (!gWimpyMode && gIsEmbedded)
+        step = blockCount * EMBEDDED_REDUCTION_FACTOR;
 
-    if ( gWimpyMode )
-        step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
-    vlog( "Testing... " );
+    if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
+    vlog("Testing... ");
     fflush(stdout);
-    for( i = 0; i < (uint64_t)lastCase; i += step )
+    for (i = 0; i < (uint64_t)lastCase; i += step)
     {
 
-        if( 0 == ( i & ((lastCase >> 3) -1))) {
+        if (0 == (i & ((lastCase >> 3) - 1)))
+        {
             vlog(".");
             fflush(stdout);
         }
@@ -1083,53 +1118,61 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
         cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
         writeInputBufferInfo.count = count;
 
-        // Crate a user event to represent the status of the reference value computation completion
-        writeInputBufferInfo.calcReferenceValues = clCreateUserEvent( gContext, &error);
-        if( error || NULL == writeInputBufferInfo.calcReferenceValues )
+        // Crate a user event to represent the status of the reference value
+        // computation completion
+        writeInputBufferInfo.calcReferenceValues =
+            clCreateUserEvent(gContext, &error);
+        if (error || NULL == writeInputBufferInfo.calcReferenceValues)
         {
-            vlog_error( "ERROR: Unable to create user event. (%d)\n", error );
+            vlog_error("ERROR: Unable to create user event. (%d)\n", error);
             gFailCount++;
             goto exit;
         }
 
         // retain for consumption by MapOutputBufferComplete
-        for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+        for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
+             vectorSize++)
         {
-            if( (error = clRetainEvent(writeInputBufferInfo.calcReferenceValues) ))
+            if ((error =
+                     clRetainEvent(writeInputBufferInfo.calcReferenceValues)))
             {
-                vlog_error( "ERROR: Unable to retain user event. (%d)\n", error );
+                vlog_error("ERROR: Unable to retain user event. (%d)\n", error);
                 gFailCount++;
                 goto exit;
             }
         }
 
-        // Crate a user event to represent when the callbacks are done verifying correctness
-        writeInputBufferInfo.doneBarrier = clCreateUserEvent( gContext, &error);
-        if( error || NULL == writeInputBufferInfo.calcReferenceValues )
+        // Crate a user event to represent when the callbacks are done verifying
+        // correctness
+        writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error);
+        if (error || NULL == writeInputBufferInfo.calcReferenceValues)
         {
-            vlog_error( "ERROR: Unable to create user event for barrier. (%d)\n", error );
+            vlog_error("ERROR: Unable to create user event for barrier. (%d)\n",
+                       error);
             gFailCount++;
             goto exit;
         }
 
         // retain for use by the callback that calls this
-        if( (error = clRetainEvent(writeInputBufferInfo.doneBarrier) ))
+        if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier)))
         {
-            vlog_error( "ERROR: Unable to retain user event doneBarrier. (%d)\n", error );
+            vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n",
+                       error);
             gFailCount++;
             goto exit;
         }
 
         //      Call this in a multithreaded manner
-        //      gInitFunctions[ inType ]( gIn, sat, round, outType, i, count, d );
+        //      gInitFunctions[ inType ]( gIn, sat, round, outType, i, count, d
+        //      );
         cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2;
         init_info.start = i;
         init_info.size = count / chunks;
-        if( init_info.size < 16384 )
+        if (init_info.size < 16384)
         {
             chunks = RoundUpToNextPowerOfTwo(threads);
             init_info.size = count / chunks;
-            if( init_info.size < 16384 )
+            if (init_info.size < 16384)
             {
                 init_info.size = count;
                 chunks = 1;
@@ -1138,43 +1181,23 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
         ThreadPool_Do(InitData, chunks, &init_info);
 
         // Copy the results to the device
-        writeInputBuffer = NULL;
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0, count * gTypeSizes[inType], gIn, 0, NULL, &writeInputBuffer )))
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
+                                          count * gTypeSizes[inType], gIn, 0,
+                                          NULL, NULL)))
         {
-            vlog_error( "ERROR: clEnqueueWriteBuffer failed. (%d)\n", error );
+            vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error);
             gFailCount++;
             goto exit;
         }
 
-        // Setup completion callback for the write, which will enqueue the rest of the work
-        // This is somewhat gratuitous.  Because this is an in order queue, we didn't really need to
-        // do this work in a callback. We could have done it from the main thread.  Here we are
-        // verifying that the implementation can enqueue work from a callback, while at the same time
-        // also checking to make sure that the conversions work.
-        //
-        // Because the verification code is also moved to a callback, it is hoped that implementations will
-        // achieve a test performance improvement because they can verify the results in parallel.  If the
-        // implementation serializes callbacks however, that won't happen.   Consider it some motivation
-        // to do the right thing! :-)
-        if( (error = clSetEventCallback( writeInputBuffer, CL_COMPLETE, WriteInputBufferComplete, &writeInputBufferInfo)) )
-        {
-            vlog_error( "ERROR: clSetEventCallback failed. (%d)\n", error );
-            gFailCount++;
-            goto exit;
-        }
-
-        // The event can't be destroyed until the callback is called, so we can release it now.
-        if( (error = clReleaseEvent(writeInputBuffer) ))
-        {
-            vlog_error( "ERROR: clReleaseEvent failed. (%d)\n", error );
-            gFailCount++;
-            goto exit;
-        }
+        // Call completion callback for the write, which will enqueue the rest
+        // of the work.
+        WriteInputBufferComplete((void *)&writeInputBufferInfo);
 
         // Make sure the work is actually running, so we don't deadlock
-        if( (error = clFlush( gQueue ) ) )
+        if ((error = clFlush(gQueue)))
         {
-            vlog_error( "clFlush failed with error %d\n", error );
+            vlog_error("clFlush failed with error %d\n", error);
             gFailCount++;
             goto exit;
         }
@@ -1182,77 +1205,91 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
         ThreadPool_Do(PrepareReference, chunks, &init_info);
 
         // signal we are done calculating the reference results
-        if( (error = clSetUserEventStatus( writeInputBufferInfo.calcReferenceValues, CL_COMPLETE ) ) )
+        if ((error = clSetUserEventStatus(
+                 writeInputBufferInfo.calcReferenceValues, CL_COMPLETE)))
         {
-            vlog_error( "Error:  Failed to set user event status to CL_COMPLETE:  %d\n", error );
+            vlog_error(
+                "Error:  Failed to set user event status to CL_COMPLETE:  %d\n",
+                error);
             gFailCount++;
             goto exit;
         }
 
         // Wait for the event callbacks to finish verifying correctness.
-        if( (error = clWaitForEvents( 1, (cl_event*) &writeInputBufferInfo.doneBarrier ) ))
+        if ((error = clWaitForEvents(
+                 1, (cl_event *)&writeInputBufferInfo.doneBarrier)))
         {
-            vlog_error( "Error:  Failed to wait for barrier:  %d\n", error );
+            vlog_error("Error:  Failed to wait for barrier:  %d\n", error);
             gFailCount++;
             goto exit;
         }
 
-        if( (error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues ) ))
+        if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues)))
         {
-            vlog_error( "Error:  Failed to release calcReferenceValues:  %d\n", error );
+            vlog_error("Error:  Failed to release calcReferenceValues:  %d\n",
+                       error);
             gFailCount++;
             goto exit;
         }
 
-        if( (error = clReleaseEvent(writeInputBufferInfo.doneBarrier ) ))
+        if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier)))
         {
-            vlog_error( "Error:  Failed to release done barrier:  %d\n", error );
+            vlog_error("Error:  Failed to release done barrier:  %d\n", error);
             gFailCount++;
             goto exit;
         }
 
 
-        for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+        for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
+             vectorSize++)
         {
-            if( ( error = writeInputBufferInfo.calcInfo[ vectorSize ].result ))
+            if ((error = writeInputBufferInfo.calcInfo[vectorSize].result))
             {
-                switch( inType )
+                switch (inType)
                 {
                     case kuchar:
                     case kchar:
-                        vlog( "Input value: 0x%2.2x ", ((unsigned char*)gIn)[error - 1] );
+                        vlog("Input value: 0x%2.2x ",
+                             ((unsigned char *)gIn)[error - 1]);
                         break;
                     case kushort:
                     case kshort:
-                        vlog( "Input value: 0x%4.4x ", ((unsigned short*)gIn)[error - 1] );
+                        vlog("Input value: 0x%4.4x ",
+                             ((unsigned short *)gIn)[error - 1]);
                         break;
                     case kuint:
                     case kint:
-                        vlog( "Input value: 0x%8.8x ", ((unsigned int*)gIn)[error - 1] );
+                        vlog("Input value: 0x%8.8x ",
+                             ((unsigned int *)gIn)[error - 1]);
                         break;
                     case kfloat:
-                        vlog( "Input value: %a ", ((float*)gIn)[error - 1] );
+                        vlog("Input value: %a ", ((float *)gIn)[error - 1]);
                         break;
                         break;
                     case kulong:
                     case klong:
-                        vlog( "Input value: 0x%16.16llx ", ((unsigned long long*)gIn)[error - 1] );
+                        vlog("Input value: 0x%16.16llx ",
+                             ((unsigned long long *)gIn)[error - 1]);
                         break;
                     case kdouble:
-                        vlog( "Input value: %a ", ((double*)gIn)[error - 1]);
+                        vlog("Input value: %a ", ((double *)gIn)[error - 1]);
                         break;
                     default:
-                        vlog_error( "Internal error at %s: %d\n", __FILE__, __LINE__ );
+                        vlog_error("Internal error at %s: %d\n", __FILE__,
+                                   __LINE__);
                         abort();
                         break;
                 }
 
                 // tell the user which conversion it was.
-                if( 0 == vectorSize )
-                    vlog( " (implicit scalar conversion from %s to %s)\n", gTypeNames[ inType ], gTypeNames[ outType ] );
+                if (0 == vectorSize)
+                    vlog(" (implicit scalar conversion from %s to %s)\n",
+                         gTypeNames[inType], gTypeNames[outType]);
                 else
-                    vlog( " (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], sizeNames[vectorSize], gSaturationNames[ sat ],
-                                                            gRoundingModeNames[ round ], gTypeNames[inType], sizeNames[vectorSize] );
+                    vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType],
+                         sizeNames[vectorSize], gSaturationNames[sat],
+                         gRoundingModeNames[round], gTypeNames[inType],
+                         sizeNames[vectorSize]);
 
                 gFailCount++;
                 goto exit;
@@ -1260,300 +1297,318 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
         }
     }
 
-    log_info( "done.\n" );
+    log_info("done.\n");
 
-    if( gTimeResults )
+    if (gTimeResults)
     {
-        //Kick off tests for the various vector lengths
-        for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+        // Kick off tests for the various vector lengths
+        for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
+             vectorSize++)
         {
             size_t workItemCount = blockCount / vectorSizes[vectorSize];
-            if( vectorSizes[vectorSize] * gTypeSizes[outType] < 4 )
-                workItemCount /= 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
+            if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4)
+                workItemCount /=
+                    4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
 
             double sum = 0.0;
             double bestTime = INFINITY;
             cl_uint k;
-            for( k = 0; k < PERF_LOOP_COUNT; k++ )
+            for (k = 0; k < PERF_LOOP_COUNT; k++)
             {
                 uint64_t startTime = GetTime();
-                if( (error = RunKernel( writeInputBufferInfo.calcInfo[vectorSize].kernel, gInBuffer, gOutBuffers[ vectorSize ], workItemCount )) )
+                if ((error = RunKernel(
+                         writeInputBufferInfo.calcInfo[vectorSize].kernel,
+                         gInBuffer, gOutBuffers[vectorSize], workItemCount)))
                 {
                     gFailCount++;
                     goto exit;
                 }
 
                 // Make sure OpenCL is done
-                if( (error = clFinish(gQueue) ) )
+                if ((error = clFinish(gQueue)))
                 {
-                    vlog_error( "Error %d at clFinish\n", error );
+                    vlog_error("Error %d at clFinish\n", error);
                     goto exit;
                 }
 
                 uint64_t endTime = GetTime();
-                double time = SubtractTime( endTime, startTime );
+                double time = SubtractTime(endTime, startTime);
                 sum += time;
-                if( time < bestTime )
-                    bestTime = time;
-
+                if (time < bestTime) bestTime = time;
             }
 
-            if( gReportAverageTimes )
-                bestTime = sum / PERF_LOOP_COUNT;
-            double clocksPerOp = bestTime * (double) gDeviceFrequency * gComputeDevices * gSimdSize * 1e6 / (workItemCount * vectorSizes[vectorSize]);
-            if( 0 == vectorSize )
-                vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "implicit convert %s -> %s", gTypeNames[ inType ], gTypeNames[ outType ] );
+            if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
+            double clocksPerOp = bestTime * (double)gDeviceFrequency
+                * gComputeDevices * gSimdSize * 1e6
+                / (workItemCount * vectorSizes[vectorSize]);
+            if (0 == vectorSize)
+                vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
+                          "implicit convert %s -> %s", gTypeNames[inType],
+                          gTypeNames[outType]);
             else
-                vlog_perf( clocksPerOp, LOWER_IS_BETTER, "clocks / element", "convert_%s%s%s%s( %s%s )", gTypeNames[ outType ], sizeNames[vectorSize], gSaturationNames[ sat ], gRoundingModeNames[round], gTypeNames[inType], sizeNames[vectorSize] );
+                vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
+                          "convert_%s%s%s%s( %s%s )", gTypeNames[outType],
+                          sizeNames[vectorSize], gSaturationNames[sat],
+                          gRoundingModeNames[round], gTypeNames[inType],
+                          sizeNames[vectorSize]);
         }
     }
 
-    if( gWimpyMode )
-        vlog( "\tWimp pass" );
+    if (gWimpyMode)
+        vlog("\tWimp pass");
     else
-        vlog( "\tpassed" );
+        vlog("\tpassed");
 
 #ifdef __APPLE__
     // record the run time
-    vlog( "\t(%f s)", 1e-9 * ( mach_absolute_time() - wall_start ) );
+    vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start));
 #endif
-    vlog( "\n\n" );
-    fflush( stdout );
+    vlog("\n\n");
+    fflush(stdout);
 
 
 exit:
-    //clean up
-    for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+    // clean up
+    for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
     {
-        clReleaseProgram( writeInputBufferInfo.calcInfo[vectorSize].program );
-        clReleaseKernel( writeInputBufferInfo.calcInfo[vectorSize].kernel );
+        clReleaseProgram(writeInputBufferInfo.calcInfo[vectorSize].program);
+        clReleaseKernel(writeInputBufferInfo.calcInfo[vectorSize].kernel);
     }
 
-    if( init_info.d )
+    if (init_info.d)
     {
-        for( i = 0; i < threads; i++ )
-            free_mtdata(init_info.d[i]);
+        for (i = 0; i < threads; i++) free_mtdata(init_info.d[i]);
         free(init_info.d);
     }
 
     return error;
 }
 
-void CL_CALLBACK MapResultValuesComplete( cl_event e, cl_int status, void *data );
+void MapResultValuesComplete(void *data);
 
 // Note: not called reentrantly
-void CL_CALLBACK WriteInputBufferComplete( cl_event e, cl_int status, void *data )
+void WriteInputBufferComplete(void *data)
 {
-    WriteInputBufferInfo *info = (WriteInputBufferInfo*) data;
+    cl_int status;
+    WriteInputBufferInfo *info = (WriteInputBufferInfo *)data;
     cl_uint count = info->count;
     int vectorSize;
 
-    if( CL_SUCCESS != status )
-    {
-        vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
-        gFailCount++;
-        return;
-    }
-
     info->barrierCount = gMaxVectorSize - gMinVectorSize;
 
-    // now that we know that the write buffer is complete, enqueue callbacks to wait for the main thread to
-    // finish calculating the reference results.
-    for( vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+    // now that we know that the write buffer is complete, enqueue callbacks to
+    // wait for the main thread to finish calculating the reference results.
+    for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
     {
-        size_t workItemCount = (count + vectorSizes[vectorSize] - 1) / ( vectorSizes[vectorSize]);
-        cl_event mapComplete = NULL;
+        size_t workItemCount =
+            (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]);
 
-        if( (status = RunKernel( info->calcInfo[ vectorSize ].kernel, gInBuffer, gOutBuffers[ vectorSize ], workItemCount )) )
+        if ((status = RunKernel(info->calcInfo[vectorSize].kernel, gInBuffer,
+                                gOutBuffers[vectorSize], workItemCount)))
         {
             gFailCount++;
             return;
         }
 
-        info->calcInfo[vectorSize].p = clEnqueueMapBuffer( gQueue, gOutBuffers[ vectorSize ], CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
-                                                          0, count * gTypeSizes[ info->outType ], 0, NULL, &mapComplete, &status);
+        info->calcInfo[vectorSize].p = clEnqueueMapBuffer(
+            gQueue, gOutBuffers[vectorSize], CL_TRUE,
+            CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0,
+            NULL, NULL, &status);
         {
-            if( status )
+            if (status)
             {
-                vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
+                vlog_error("ERROR: WriteInputBufferComplete calback failed "
+                           "with status: %d\n",
+                           status);
                 gFailCount++;
                 return;
             }
         }
+    }
 
-        if( (status = clSetEventCallback( mapComplete, CL_COMPLETE, MapResultValuesComplete, info->calcInfo + vectorSize)))
-        {
-            vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
-            gFailCount++;
-            return;
-        }
-
-        if( (status = clReleaseEvent(mapComplete)))
-        {
-            vlog_error( "ERROR: clReleaseEvent calback failed in WriteInputBufferComplete for vector size %d with status: %d\n", vectorSize, status );
-            gFailCount++;
-            return;
-        }
+    for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+    {
+        MapResultValuesComplete(info->calcInfo + vectorSize);
     }
 
     // Make sure the work starts moving -- otherwise we may deadlock
-    if( (status = clFlush(gQueue)))
+    if ((status = clFlush(gQueue)))
     {
-        vlog_error( "ERROR: WriteInputBufferComplete calback failed with status: %d\n", status );
+        vlog_error(
+            "ERROR: WriteInputBufferComplete calback failed with status: %d\n",
+            status);
         gFailCount++;
         return;
     }
 
-    // e was already released by the main thread. It should be destroyed automatically soon after we exit.
+    // e was already released by the main thread. It should be destroyed
+    // automatically soon after we exit.
 }
 
-void CL_CALLBACK CalcReferenceValuesComplete( cl_event e, cl_int status, void *data );
+void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
+                                             void *data);
 
 // Note: May be called reentrantly
-void CL_CALLBACK MapResultValuesComplete( cl_event e, cl_int status, void *data )
+void MapResultValuesComplete(void *data)
 {
-    CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo*) data;
+    cl_int status;
+    CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data;
     cl_event calcReferenceValues = info->parent->calcReferenceValues;
 
-    if( CL_SUCCESS != status )
+    // we know that the map is done, wait for the main thread to finish
+    // calculating the reference values
+    if ((status = clSetEventCallback(calcReferenceValues, CL_COMPLETE,
+                                     CalcReferenceValuesComplete, data)))
     {
-        vlog_error( "ERROR: MapResultValuesComplete calback failed with status: %d\n", status );
-        gFailCount++;       // not thread safe -- being lazy here
-        clReleaseEvent(calcReferenceValues);
-        return;
+        vlog_error("ERROR: clSetEventCallback failed in "
+                   "MapResultValuesComplete with status: %d\n",
+                   status);
+        gFailCount++; // not thread safe -- being lazy here
     }
 
-    // we know that the map is done, wait for the main thread to finish calculating the reference values
-    if( (status = clSetEventCallback( calcReferenceValues, CL_COMPLETE, CalcReferenceValuesComplete, data )))
+    // this thread no longer needs its reference to info->calcReferenceValues,
+    // so release it
+    if ((status = clReleaseEvent(calcReferenceValues)))
     {
-        vlog_error( "ERROR: clSetEventCallback failed in MapResultValuesComplete with status: %d\n", status );
-        gFailCount++;       // not thread safe -- being lazy here
-    }
-
-    // this thread no longer needs its reference to info->calcReferenceValues, so release it
-    if( (status = clReleaseEvent(calcReferenceValues) ))
-    {
-        vlog_error( "ERROR: clReleaseEvent(info->calcReferenceValues) failed with status: %d\n", status );
-        gFailCount++;       // not thread safe -- being lazy here
+        vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed "
+                   "with status: %d\n",
+                   status);
+        gFailCount++; // not thread safe -- being lazy here
     }
 
     // no need to flush since we didn't enqueue anything
 
-    // e was already released by WriteInputBufferComplete. It should be destroyed automatically soon after we exit.
+    // e was already released by WriteInputBufferComplete. It should be
+    // destroyed automatically soon after we exit.
 }
 
 
-void CL_CALLBACK CalcReferenceValuesComplete( cl_event e, cl_int status, void *data )
+void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
+                                             void *data)
 {
-    CalcReferenceValuesInfo     *info = (CalcReferenceValuesInfo*) data;
-    cl_uint                     vectorSize = info->vectorSize;
-    cl_uint                     count = info->parent->count;
-    Type                        outType = info->parent->outType;        // the data type of the conversion result
-    Type                        inType = info->parent->inType;          // the data type of the conversion input
-    size_t                      j;
-    cl_int                      error;
-    cl_event                    doneBarrier = info->parent->doneBarrier;
+    CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data;
+    cl_uint vectorSize = info->vectorSize;
+    cl_uint count = info->parent->count;
+    Type outType =
+        info->parent->outType; // the data type of the conversion result
+    Type inType = info->parent->inType; // the data type of the conversion input
+    size_t j;
+    cl_int error;
+    cl_event doneBarrier = info->parent->doneBarrier;
 
     // report spurious error condition
-    if( CL_SUCCESS != status )
+    if (CL_SUCCESS != status)
     {
-        vlog_error( "ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", status );
-        gFailCount++;       // lazy about thread safety here
+        vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n",
+                   status);
+        gFailCount++; // lazy about thread safety here
         return;
     }
 
-    // Now we know that both results have been mapped back from the device, and the
-    // main thread is done calculating the reference results. It is now time to check
-    // the results.
+    // Now we know that both results have been mapped back from the device, and
+    // the main thread is done calculating the reference results. It is now time
+    // to check the results.
 
     // verify results
     void *mapped = info->p;
 
-    //Patch up NaNs conversions to integer to zero -- these can be converted to any integer
-    if( outType != kfloat && outType != kdouble )
+    // Patch up NaNs conversions to integer to zero -- these can be converted to
+    // any integer
+    if (outType != kfloat && outType != kdouble)
     {
-        if( inType == kfloat )
+        if (inType == kfloat)
         {
-            float *inp = (float*) gIn;
-            for( j = 0; j < count; j++ )
+            float *inp = (float *)gIn;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) )
-                    memset( (char*) mapped + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+                if (isnan(inp[j]))
+                    memset((char *)mapped + j * gTypeSizes[outType], 0,
+                           gTypeSizes[outType]);
             }
         }
-        if( inType == kdouble )
+        if (inType == kdouble)
         {
-            double *inp = (double*) gIn;
-            for( j = 0; j < count; j++ )
+            double *inp = (double *)gIn;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) )
-                    memset( (char*) mapped + j * gTypeSizes[ outType ], 0, gTypeSizes[ outType ] );
+                if (isnan(inp[j]))
+                    memset((char *)mapped + j * gTypeSizes[outType], 0,
+                           gTypeSizes[outType]);
             }
         }
     }
-    else if( inType == kfloat || inType == kdouble )
-    {  // outtype and intype is float or double.  NaN conversions for float <-> double can be any NaN
-        if( inType == kfloat && outType == kdouble )
+    else if (inType == kfloat || inType == kdouble)
+    { // outtype and intype is float or double.  NaN conversions for float <->
+      // double can be any NaN
+        if (inType == kfloat && outType == kdouble)
         {
-            float *inp = (float*) gIn;
-            double *outp = (double*) mapped;
-            for( j = 0; j < count; j++ )
+            float *inp = (float *)gIn;
+            double *outp = (double *)mapped;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) && isnan(outp[j]) )
-                    outp[j] = NAN;
+                if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
             }
         }
-        if( inType == kdouble && outType == kfloat )
+        if (inType == kdouble && outType == kfloat)
         {
-            double *inp = (double*) gIn;
-            float *outp = (float*) mapped;
-            for( j = 0; j < count; j++ )
+            double *inp = (double *)gIn;
+            float *outp = (float *)mapped;
+            for (j = 0; j < count; j++)
             {
-                if( isnan( inp[j] ) && isnan(outp[j]) )
-                    outp[j] = NAN;
+                if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
             }
         }
     }
 
-    if( memcmp( mapped, gRef, count * gTypeSizes[ outType ] ) )
-        info->result = gCheckResults[outType]( mapped, gRef, gAllowZ, count, vectorSizes[vectorSize] );
+    if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
+        info->result = gCheckResults[outType](mapped, gRef, gAllowZ, count,
+                                              vectorSizes[vectorSize]);
     else
         info->result = 0;
 
     // Fill the output buffer with junk and release it
     {
-        cl_uint pattern =  0xffffdead;
+        cl_uint pattern = 0xffffdead;
         memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]);
-        if((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[ vectorSize ], mapped, 0, NULL, NULL)))
+        if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize],
+                                             mapped, 0, NULL, NULL)))
         {
-            vlog_error( "ERROR: clEnqueueUnmapMemObject failed in CalcReferenceValuesComplete  (%d)\n", error );
+            vlog_error("ERROR: clEnqueueUnmapMemObject failed in "
+                       "CalcReferenceValuesComplete  (%d)\n",
+                       error);
             gFailCount++;
         }
     }
 
-    if( 1 == ThreadPool_AtomicAdd( &info->parent->barrierCount, -1) )
+    if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1))
     {
-        if( (status = clSetUserEventStatus( doneBarrier, CL_COMPLETE) ))
+        if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE)))
         {
-            vlog_error( "ERROR: clSetUserEventStatus failed in CalcReferenceValuesComplete (err: %d). We're probably going to deadlock.\n", status );
+            vlog_error("ERROR: clSetUserEventStatus failed in "
+                       "CalcReferenceValuesComplete (err: %d). We're probably "
+                       "going to deadlock.\n",
+                       status);
             gFailCount++;
             return;
         }
 
-        if( (status = clReleaseEvent( doneBarrier ) ) )
+        if ((status = clReleaseEvent(doneBarrier)))
         {
-            vlog_error( "ERROR: clReleaseEvent failed in CalcReferenceValuesComplete (err: %d).\n", status );
+            vlog_error("ERROR: clReleaseEvent failed in "
+                       "CalcReferenceValuesComplete (err: %d).\n",
+                       status);
             gFailCount++;
             return;
         }
     }
-
-
-    // e was already released by WriteInputBufferComplete. It should be destroyed automatically soon after
-    // all the calls to CalcReferenceValuesComplete exit.
+    // e was already released by WriteInputBufferComplete. It should be
+    // destroyed automatically soon after all the calls to
+    // CalcReferenceValuesComplete exit.
 }
 
-static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat, RoundingMode round, int vectorSize, cl_kernel *outKernel )
+static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
+                              RoundingMode round, int vectorSize,
+                              cl_kernel *outKernel)
 {
     cl_program program;
     char testName[256];
@@ -1563,7 +1618,8 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
     if (outType == kdouble || inType == kdouble)
         source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
 
-    // Create the program. This is a bit complicated because we are trying to avoid byte and short stores.
+    // Create the program. This is a bit complicated because we are trying to
+    // avoid byte and short stores.
     if (0 == vectorSize)
     {
         // Create the type names.
@@ -1594,27 +1650,35 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
         char outName[32];
         switch (vectorSizetmp)
         {
-        case 1:
-            strncpy(inName, gTypeNames[inType], sizeof(inName));
-            strncpy(outName, gTypeNames[outType], sizeof(outName));
-            snprintf(convertString, sizeof(convertString), "convert_%s%s%s", outName, gSaturationNames[sat], gRoundingModeNames[round]);
-            snprintf(testName, 256, "test_%s_%s", convertString, inName);
-            vlog("Building %s( %s ) test\n", convertString, inName);
-            break;
-        case 3:
-            strncpy(inName, gTypeNames[inType], sizeof(inName));
-            strncpy(outName, gTypeNames[outType], sizeof(outName));
-            snprintf(convertString, sizeof(convertString), "convert_%s3%s%s", outName, gSaturationNames[sat], gRoundingModeNames[round]);
-            snprintf(testName, 256, "test_%s_%s3", convertString, inName);
-            vlog("Building %s( %s3 ) test\n", convertString, inName);
-            break;
-        default:
-            snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], vectorSizetmp);
-            snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], vectorSizetmp);
-            snprintf(convertString, sizeof(convertString), "convert_%s%s%s", outName, gSaturationNames[sat], gRoundingModeNames[round]);
-            snprintf(testName, 256, "test_%s_%s", convertString, inName);
-            vlog("Building %s( %s ) test\n", convertString, inName);
-            break;
+            case 1:
+                strncpy(inName, gTypeNames[inType], sizeof(inName));
+                strncpy(outName, gTypeNames[outType], sizeof(outName));
+                snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
+                         outName, gSaturationNames[sat],
+                         gRoundingModeNames[round]);
+                snprintf(testName, 256, "test_%s_%s", convertString, inName);
+                vlog("Building %s( %s ) test\n", convertString, inName);
+                break;
+            case 3:
+                strncpy(inName, gTypeNames[inType], sizeof(inName));
+                strncpy(outName, gTypeNames[outType], sizeof(outName));
+                snprintf(convertString, sizeof(convertString),
+                         "convert_%s3%s%s", outName, gSaturationNames[sat],
+                         gRoundingModeNames[round]);
+                snprintf(testName, 256, "test_%s_%s3", convertString, inName);
+                vlog("Building %s( %s3 ) test\n", convertString, inName);
+                break;
+            default:
+                snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType],
+                         vectorSizetmp);
+                snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType],
+                         vectorSizetmp);
+                snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
+                         outName, gSaturationNames[sat],
+                         gRoundingModeNames[round]);
+                snprintf(testName, 256, "test_%s_%s", convertString, inName);
+                vlog("Building %s( %s ) test\n", convertString, inName);
+                break;
         }
         fflush(stdout);
 
@@ -1654,8 +1718,7 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
     *outKernel = NULL;
 
     const char *flags = NULL;
-    if( gForceFTZ )
-        flags = "-cl-denorms-are-zero";
+    if (gForceFTZ) flags = "-cl-denorms-are-zero";
 
     // build it
     std::string sourceString = source.str();
@@ -1664,7 +1727,7 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
                                         &programSource, testName, flags);
     if (error)
     {
-        vlog_error("Failed to build kernel/program.\n", error);
+        vlog_error("Failed to build kernel/program (err = %d).\n", error);
         clReleaseProgram(program);
         return NULL;
     }
diff --git a/test_conformance/device_partition/test_device_partition.cpp b/test_conformance/device_partition/test_device_partition.cpp
index f9952ec8..cb53605a 100644
--- a/test_conformance/device_partition/test_device_partition.cpp
+++ b/test_conformance/device_partition/test_device_partition.cpp
@@ -212,7 +212,7 @@ int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices
     clProgramWrapper program;
     clKernelWrapper kernels[2];
     clMemWrapper  stream;
-    clCommandQueueWrapper queues[MAX_QUEUES];
+    clCommandQueueWrapper queues[MAX_QUEUES] = {};
     size_t threads[1], localThreads[1];
     int data[TEST_SIZE];
     int outputData[TEST_SIZE];
@@ -226,8 +226,6 @@ int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices
         expectedResultsOneDevice[i] = expectedResultsOneDeviceArray + (i * TEST_SIZE);
     }
 
-    memset(queues, 0, sizeof(queues));
-
     RandomSeed seed( gRandomSeed );
 
     if (queueCount > MAX_QUEUES) {
@@ -390,7 +388,7 @@ int test_device_partition_type_support(cl_device_id parentDevice, const cl_devic
     } else {
         test_error_ret( err, "Unable to get device partition properties (1)", -1 );
     };
-    for ( int i = 0; i < supportedProps.size(); i++)
+    for (size_t i = 0; i < supportedProps.size(); i++)
     {
         if (supportedProps[i] == partitionType)
         {
diff --git a/test_conformance/device_timer/CMakeLists.txt b/test_conformance/device_timer/CMakeLists.txt
index 4af7c7f7..a24d8d24 100644
--- a/test_conformance/device_timer/CMakeLists.txt
+++ b/test_conformance/device_timer/CMakeLists.txt
@@ -5,4 +5,6 @@ set(${MODULE_NAME}_SOURCES
     test_device_timer.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/events/action_classes.cpp b/test_conformance/events/action_classes.cpp
index a84be6b6..55dbc477 100644
--- a/test_conformance/events/action_classes.cpp
+++ b/test_conformance/events/action_classes.cpp
@@ -189,7 +189,7 @@ cl_int NDRangeKernelAction::Execute(cl_command_queue queue, cl_uint numWaits,
 #pragma mark -------------------- Buffer Sub-Classes -------------------------
 
 cl_int BufferAction::Setup(cl_device_id device, cl_context context,
-                           cl_command_queue queue, bool allocate)
+                           cl_command_queue queue)
 {
     cl_int error;
     cl_ulong maxAllocSize;
@@ -227,7 +227,7 @@ cl_int BufferAction::Setup(cl_device_id device, cl_context context,
 cl_int ReadBufferAction::Setup(cl_device_id device, cl_context context,
                                cl_command_queue queue)
 {
-    return BufferAction::Setup(device, context, queue, true);
+    return BufferAction::Setup(device, context, queue);
 }
 
 cl_int ReadBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
@@ -243,7 +243,7 @@ cl_int ReadBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
 cl_int WriteBufferAction::Setup(cl_device_id device, cl_context context,
                                 cl_command_queue queue)
 {
-    return BufferAction::Setup(device, context, queue, true);
+    return BufferAction::Setup(device, context, queue);
 }
 
 cl_int WriteBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
@@ -265,7 +265,7 @@ MapBufferAction::~MapBufferAction()
 cl_int MapBufferAction::Setup(cl_device_id device, cl_context context,
                               cl_command_queue queue)
 {
-    return BufferAction::Setup(device, context, queue, false);
+    return BufferAction::Setup(device, context, queue);
 }
 
 cl_int MapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
@@ -283,7 +283,7 @@ cl_int MapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
 cl_int UnmapBufferAction::Setup(cl_device_id device, cl_context context,
                                 cl_command_queue queue)
 {
-    cl_int error = BufferAction::Setup(device, context, queue, false);
+    cl_int error = BufferAction::Setup(device, context, queue);
     if (error != CL_SUCCESS) return error;
 
     mMappedPtr = clEnqueueMapBuffer(queue, mBuffer, CL_TRUE, CL_MAP_READ, 0,
diff --git a/test_conformance/events/action_classes.h b/test_conformance/events/action_classes.h
index e528f11a..3f315429 100644
--- a/test_conformance/events/action_classes.h
+++ b/test_conformance/events/action_classes.h
@@ -72,7 +72,7 @@ public:
     virtual ~BufferAction() { free(mOutBuffer); }
 
     virtual cl_int Setup(cl_device_id device, cl_context context,
-                         cl_command_queue queue, bool allocate);
+                         cl_command_queue queue);
 };
 
 class ReadBufferAction : public BufferAction {
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index 45b260a6..4efe1a65 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -52,7 +52,7 @@ int test_event_enqueue_wait_for_events_run_test(
 {
     cl_int error = CL_SUCCESS;
     size_t threads[3] = { TEST_SIZE, 0, 0 };
-    int i, loop_count, event_count, expected_value, failed;
+    int i, loop_count, expected_value, failed;
     int expected_if_only_queue[2];
     int max_count = TEST_SIZE;
 
@@ -65,8 +65,6 @@ int test_event_enqueue_wait_for_events_run_test(
     clMemWrapper data;
     clProgramWrapper program;
     clKernelWrapper kernel1[TEST_COUNT], kernel2[TEST_COUNT];
-    clEventWrapper event[TEST_COUNT * 4 + 2]; // If we usemarkers we get 2 more
-                                              // events per iteration
 
     if (test_enqueue_wait_for_events)
         log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier "
@@ -249,11 +247,13 @@ int test_event_enqueue_wait_for_events_run_test(
     else
         log_info("Queues chosen alternatily for each kernel execution.\n");
 
-    event_count = 0;
+    clEventWrapper pre_loop_event;
+    clEventWrapper last_loop_event;
+
     for (i = 0; i < (int)TEST_SIZE; i++) values[i] = 1;
     error = clEnqueueWriteBuffer(queues[0], data, CL_FALSE, 0,
                                  TEST_SIZE * sizeof(cl_int), values, 0, NULL,
-                                 &event[event_count]);
+                                 &pre_loop_event);
     test_error(error, "clEnqueueWriteBuffer 2 failed");
     expected_value = 1;
     expected_if_only_queue[0] = 1;
@@ -263,7 +263,7 @@ int test_event_enqueue_wait_for_events_run_test(
     if (test_enqueue_wait_for_events)
     {
         error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1,
-                                             &event[event_count], NULL);
+                                             &pre_loop_event, NULL);
         test_error(error, "Unable to queue wait for events");
     }
     else if (test_barrier)
@@ -275,6 +275,13 @@ int test_event_enqueue_wait_for_events_run_test(
 
     for (loop_count = 0; loop_count < TEST_COUNT; loop_count++)
     {
+        int event_count = 0;
+        clEventWrapper first_dependency =
+            (loop_count == 0) ? pre_loop_event : last_loop_event;
+        clEventWrapper
+            event[5]; // A maximum of 5 events are created in the loop
+        event[event_count] = first_dependency;
+
         // Execute kernel 1
         event_count++;
         if (use_waitlist | use_marker)
@@ -424,6 +431,7 @@ int test_event_enqueue_wait_for_events_run_test(
                                                  NULL);
             test_error(error, "Unable to queue barrier");
         }
+        last_loop_event = event[event_count];
     }
 
     // Now finish up everything
@@ -435,7 +443,7 @@ int test_event_enqueue_wait_for_events_run_test(
 
     error = clEnqueueReadBuffer(queues[0], data, CL_TRUE, 0,
                                 TEST_SIZE * sizeof(cl_int), values, 1,
-                                &event[event_count], NULL);
+                                &last_loop_event, NULL);
 
     test_error(error, "clEnqueueReadBuffer failed");
 
diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp
index 34157fa0..c1321f91 100644
--- a/test_conformance/events/test_events.cpp
+++ b/test_conformance/events/test_events.cpp
@@ -422,7 +422,6 @@ int test_event_wait_for_array(cl_device_id deviceID, cl_context context,
 int test_event_flush(cl_device_id deviceID, cl_context context,
                      cl_command_queue queue, int num_elements)
 {
-    int loopCount = 0;
     cl_int status;
     SETUP_EVENT(context, queue);
 
@@ -445,7 +444,6 @@ int test_event_flush(cl_device_id deviceID, cl_context context,
 #else // _WIN32
         Sleep(1000);
 #endif
-        ++loopCount;
     }
 
     /*
diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt
index d95d29aa..3187174f 100644
--- a/test_conformance/extensions/CMakeLists.txt
+++ b/test_conformance/extensions/CMakeLists.txt
@@ -1,3 +1,7 @@
 add_subdirectory( cl_ext_cxx_for_opencl )
 add_subdirectory( cl_khr_command_buffer )
 add_subdirectory( cl_khr_dx9_media_sharing )
+add_subdirectory( cl_khr_semaphore )
+if(VULKAN_IS_SUPPORTED)
+    add_subdirectory( cl_khr_external_semaphore )
+endif()
diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
index ac259f6d..4b9968c3 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
@@ -3,6 +3,19 @@ set(MODULE_NAME CL_KHR_COMMAND_BUFFER)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     basic_command_buffer.cpp
+    command_buffer_printf.cpp
+    command_buffer_get_command_buffer_info.cpp
+    command_buffer_set_kernel_arg.cpp
+    command_buffer_event_sync.cpp
+    command_buffer_out_of_order.cpp
+    command_buffer_profiling.cpp
+    command_buffer_queue_substitution.cpp
+    command_buffer_test_fill.cpp
+    command_buffer_test_copy.cpp
+    command_buffer_test_barrier.cpp
+    command_buffer_test_event_info.cpp
 )
 
 include(../../CMakeCommon.txt)
+
+add_subdirectory( cl_khr_command_buffer_mutable_dispatch )
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
index 62a02d83..43734da0 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
@@ -13,157 +13,169 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "command_buffer_test_base.h"
+#include "basic_command_buffer.h"
 #include "procs.h"
-#include "harness/typeWrappers.h"
 
 #include <algorithm>
 #include <cstring>
 #include <vector>
 
-#define CHECK_VERIFICATION_ERROR(reference, result, index)                     \
-    {                                                                          \
-        if (reference != result)                                               \
-        {                                                                      \
-            log_error("Expected %d was %d at index %u\n", reference, result,   \
-                      index);                                                  \
-            return TEST_FAIL;                                                  \
-        }                                                                      \
-    }
-
-namespace {
 
-// Helper test fixture for constructing OpenCL objects used in testing
-// a variety of simple command-buffer enqueue scenarios.
-struct BasicCommandBufferTest : CommandBufferTestBase
+//--------------------------------------------------------------------------
+BasicCommandBufferTest::BasicCommandBufferTest(cl_device_id device,
+                                               cl_context context,
+                                               cl_command_queue queue)
+    : CommandBufferTestBase(device), context(context), queue(nullptr),
+      num_elements(0), simultaneous_use_support(false),
+      out_of_order_support(false),
+      // try to use simultaneous path by default
+      simultaneous_use_requested(true),
+      // due to simultaneous cases extend buffer size
+      buffer_size_multiplier(1), command_buffer(this)
 {
-
-    BasicCommandBufferTest(cl_device_id device, cl_context context,
-                           cl_command_queue queue)
-        : CommandBufferTestBase(device), context(context), queue(queue),
-          command_buffer(this), simultaneous_use(false),
-          out_of_order_support(false), num_elements(0)
-    {}
-
-    virtual bool Skip()
+    cl_int error = clRetainCommandQueue(queue);
+    if (error != CL_SUCCESS)
     {
-        cl_command_queue_properties required_properties;
-        cl_int error = clGetDeviceInfo(
-            device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
-            sizeof(required_properties), &required_properties, NULL);
-        test_error(error,
-                   "Unable to query "
-                   "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
-
-        cl_command_queue_properties queue_properties;
-
-        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
-                                      sizeof(queue_properties),
-                                      &queue_properties, NULL);
-        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
-
-        // Skip if queue properties don't contain those required
-        return required_properties != (required_properties & queue_properties);
+        throw std::runtime_error("clRetainCommandQueue failed\n");
     }
+    this->queue = queue;
+}
 
-    virtual cl_int SetUp(int elements)
-    {
-        cl_int error = init_extension_functions();
-        if (error != CL_SUCCESS)
-        {
-            return error;
-        }
-
-        // Query if device supports simultaneous use
-        cl_device_command_buffer_capabilities_khr capabilities;
-        error =
-            clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR,
+//--------------------------------------------------------------------------
+bool BasicCommandBufferTest::Skip()
+{
+    cl_command_queue_properties required_properties;
+    cl_int error = clGetDeviceInfo(
+        device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
+        sizeof(required_properties), &required_properties, NULL);
+    test_error(error,
+               "Unable to query "
+               "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
+
+    cl_command_queue_properties queue_properties;
+    error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
+                                  sizeof(queue_properties), &queue_properties,
+                                  NULL);
+    test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
+
+
+    // Query if device supports simultaneous use
+    cl_device_command_buffer_capabilities_khr capabilities;
+    error = clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR,
                             sizeof(capabilities), &capabilities, NULL);
-        test_error(error,
-                   "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
-        simultaneous_use =
-            capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR;
-        out_of_order_support =
-            capabilities & CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR;
-
-        if (elements <= 0)
-        {
-            return CL_INVALID_VALUE;
-        }
-        num_elements = static_cast<size_t>(elements);
-
-        // Kernel performs a parallel copy from an input buffer to output buffer
-        // is created.
-        const char *kernel_str =
-            R"(
-        __kernel void copy(__global int* in, __global int* out) {
-            size_t id = get_global_id(0);
-            out[id] = in[id];
-        })";
-
-        error = create_single_kernel_helper_create_program(context, &program, 1,
-                                                           &kernel_str);
-        test_error(error, "Failed to create program with source");
-
-        error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
-        test_error(error, "Failed to build program");
-
-        in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
-                                sizeof(cl_int) * num_elements, nullptr, &error);
-        test_error(error, "clCreateBuffer failed");
+    test_error(error,
+               "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
+    simultaneous_use_support = simultaneous_use_requested
+        && (capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR)
+            != 0;
+    out_of_order_support =
+        capabilities & CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR;
+
+    // Skip if queue properties don't contain those required
+    return required_properties != (required_properties & queue_properties);
+}
 
-        out_mem =
-            clCreateBuffer(context, CL_MEM_WRITE_ONLY,
-                           sizeof(cl_int) * num_elements, nullptr, &error);
-        test_error(error, "clCreateBuffer failed");
+//--------------------------------------------------------------------------
+cl_int BasicCommandBufferTest::SetUpKernel()
+{
+    cl_int error = CL_SUCCESS;
 
-        kernel = clCreateKernel(program, "copy", &error);
-        test_error(error, "Failed to create copy kernel");
+    // Kernel performs a parallel copy from an input buffer to output buffer
+    // is created.
+    const char *kernel_str =
+        R"(
+  __kernel void copy(__global int* in, __global int* out, __global int* offset) {
+      size_t id = get_global_id(0);
+      int ind = offset[0] + id;
+      out[ind] = in[ind];
+  })";
 
-        error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
-        test_error(error, "clSetKernelArg failed");
+    error = create_single_kernel_helper_create_program(context, &program, 1,
+                                                       &kernel_str);
+    test_error(error, "Failed to create program with source");
 
-        error = clSetKernelArg(kernel, 1, sizeof(out_mem), &out_mem);
-        test_error(error, "clSetKernelArg failed");
+    error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+    test_error(error, "Failed to build program");
 
-        if (simultaneous_use)
-        {
-            cl_command_buffer_properties_khr properties[3] = {
-                CL_COMMAND_BUFFER_FLAGS_KHR,
-                CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0
-            };
-            command_buffer =
-                clCreateCommandBufferKHR(1, &queue, properties, &error);
-        }
-        else
-        {
-            command_buffer =
-                clCreateCommandBufferKHR(1, &queue, nullptr, &error);
-        }
-        test_error(error, "clCreateCommandBufferKHR failed");
+    kernel = clCreateKernel(program, "copy", &error);
+    test_error(error, "Failed to create copy kernel");
 
-        return CL_SUCCESS;
+    return CL_SUCCESS;
+}
+
+//--------------------------------------------------------------------------
+cl_int BasicCommandBufferTest::SetUpKernelArgs()
+{
+    cl_int error = CL_SUCCESS;
+    in_mem =
+        clCreateBuffer(context, CL_MEM_READ_ONLY,
+                       sizeof(cl_int) * num_elements * buffer_size_multiplier,
+                       nullptr, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    out_mem =
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                       sizeof(cl_int) * num_elements * buffer_size_multiplier,
+                       nullptr, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    cl_int offset = 0;
+    off_mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                             sizeof(cl_int), &offset, &error);
+    test_error(error, "clCreateBuffer failed");
+
+    error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
+    test_error(error, "clSetKernelArg failed");
+
+    error = clSetKernelArg(kernel, 1, sizeof(out_mem), &out_mem);
+    test_error(error, "clSetKernelArg failed");
+
+    error = clSetKernelArg(kernel, 2, sizeof(off_mem), &off_mem);
+    test_error(error, "clSetKernelArg failed");
+
+    return CL_SUCCESS;
+}
+
+//--------------------------------------------------------------------------
+cl_int BasicCommandBufferTest::SetUp(int elements)
+{
+    cl_int error = init_extension_functions();
+    if (error != CL_SUCCESS)
+    {
+        return error;
     }
 
-    // Test body returning an OpenCL error code
-    virtual cl_int Run() = 0;
+    if (elements <= 0)
+    {
+        return CL_INVALID_VALUE;
+    }
+    num_elements = static_cast<size_t>(elements);
 
+    error = SetUpKernel();
+    test_error(error, "SetUpKernel failed");
 
-protected:
-    size_t data_size() const { return num_elements * sizeof(cl_int); }
+    error = SetUpKernelArgs();
+    test_error(error, "SetUpKernelArgs failed");
 
-    cl_context context;
-    cl_command_queue queue;
-    clCommandBufferWrapper command_buffer;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper in_mem, out_mem;
-    size_t num_elements;
+    if (simultaneous_use_support)
+    {
+        cl_command_buffer_properties_khr properties[3] = {
+            CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR,
+            0
+        };
+        command_buffer =
+            clCreateCommandBufferKHR(1, &queue, properties, &error);
+    }
+    else
+    {
+        command_buffer = clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+    }
+    test_error(error, "clCreateCommandBufferKHR failed");
 
-    // Device support query results
-    bool simultaneous_use;
-    bool out_of_order_support;
-};
+    return CL_SUCCESS;
+}
+
+namespace {
 
 // Test enqueuing a command-buffer containing a single NDRange command once
 struct BasicEnqueueTest : public BasicCommandBufferTest
@@ -262,53 +274,6 @@ struct MixedCommandsTest : public BasicCommandBufferTest
     }
 };
 
-// Test enqueueing a command-buffer blocked on a user-event
-struct UserEventTest : public BasicCommandBufferTest
-{
-    using BasicCommandBufferTest::BasicCommandBufferTest;
-
-    cl_int Run() override
-    {
-        cl_int error = clCommandNDRangeKernelKHR(
-            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
-            nullptr, 0, nullptr, nullptr, nullptr);
-        test_error(error, "clCommandNDRangeKernelKHR failed");
-
-        error = clFinalizeCommandBufferKHR(command_buffer);
-        test_error(error, "clFinalizeCommandBufferKHR failed");
-
-        clEventWrapper user_event = clCreateUserEvent(context, &error);
-        test_error(error, "clCreateUserEvent failed");
-
-        const cl_int pattern = 42;
-        error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
-                                    data_size(), 0, nullptr, nullptr);
-        test_error(error, "clEnqueueFillBuffer failed");
-
-        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
-                                          &user_event, nullptr);
-        test_error(error, "clEnqueueCommandBufferKHR failed");
-
-        std::vector<cl_int> output_data(num_elements);
-        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
-                                    output_data.data(), 0, nullptr, nullptr);
-        test_error(error, "clEnqueueReadBuffer failed");
-
-        error = clSetUserEventStatus(user_event, CL_COMPLETE);
-        test_error(error, "clSetUserEventStatus failed");
-
-        error = clFinish(queue);
-        test_error(error, "clFinish failed");
-
-        for (size_t i = 0; i < num_elements; i++)
-        {
-            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
-        }
-
-        return CL_SUCCESS;
-    }
-};
-
 // Test flushing the command-queue between command-buffer enqueues
 struct ExplicitFlushTest : public BasicCommandBufferTest
 {
@@ -375,7 +340,7 @@ struct ExplicitFlushTest : public BasicCommandBufferTest
 
     bool Skip() override
     {
-        return !simultaneous_use || BasicCommandBufferTest::Skip();
+        return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
     }
 };
 
@@ -431,120 +396,10 @@ struct InterleavedEnqueueTest : public BasicCommandBufferTest
 
     bool Skip() override
     {
-        return !simultaneous_use || BasicCommandBufferTest::Skip();
-    }
-};
-
-// Test sync-points with an out-of-order command-buffer
-struct OutOfOrderTest : public BasicCommandBufferTest
-{
-    using BasicCommandBufferTest::BasicCommandBufferTest;
-    OutOfOrderTest(cl_device_id device, cl_context context,
-                   cl_command_queue queue)
-        : BasicCommandBufferTest(device, context, queue),
-          out_of_order_command_buffer(this), out_of_order_queue(nullptr),
-          event(nullptr)
-    {}
-
-    cl_int Run() override
-    {
-        cl_sync_point_khr sync_points[2];
-
-        const cl_int pattern = 42;
-        cl_int error =
-            clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
-                                   &pattern, sizeof(cl_int), 0, data_size(), 0,
-                                   nullptr, &sync_points[0], nullptr);
-        test_error(error, "clCommandFillBufferKHR failed");
-
-        const cl_int overwritten_pattern = 0xACDC;
-        error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
-                                       out_mem, &overwritten_pattern,
-                                       sizeof(cl_int), 0, data_size(), 0,
-                                       nullptr, &sync_points[1], nullptr);
-        test_error(error, "clCommandFillBufferKHR failed");
-
-        error = clCommandNDRangeKernelKHR(
-            out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
-            &num_elements, nullptr, 2, sync_points, nullptr, nullptr);
-        test_error(error, "clCommandNDRangeKernelKHR failed");
-
-        error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
-        test_error(error, "clFinalizeCommandBufferKHR failed");
-
-        error = clEnqueueCommandBufferKHR(
-            0, nullptr, out_of_order_command_buffer, 0, nullptr, &event);
-        test_error(error, "clEnqueueCommandBufferKHR failed");
-
-        std::vector<cl_int> output_data(num_elements);
-        error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
-                                    data_size(), output_data.data(), 1, &event,
-                                    nullptr);
-        test_error(error, "clEnqueueReadBuffer failed");
-
-        for (size_t i = 0; i < num_elements; i++)
-        {
-            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
-        }
-
-        return CL_SUCCESS;
-    }
-
-    cl_int SetUp(int elements) override
-    {
-        cl_int error = BasicCommandBufferTest::SetUp(elements);
-        test_error(error, "BasicCommandBufferTest::SetUp failed");
-
-        if (!out_of_order_support)
-        {
-            // Test will skip as device doesn't support out-of-order
-            // command-buffers
-            return CL_SUCCESS;
-        }
-
-        out_of_order_queue = clCreateCommandQueue(
-            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
-        test_error(error, "Unable to create command queue to test with");
-
-        out_of_order_command_buffer =
-            clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
-        test_error(error, "clCreateCommandBufferKHR failed");
-
-        return CL_SUCCESS;
-    }
-
-    bool Skip() override
-    {
-        return !out_of_order_support || BasicCommandBufferTest::Skip();
+        return BasicCommandBufferTest::Skip() || !simultaneous_use_support;
     }
-
-    clCommandQueueWrapper out_of_order_queue;
-    clCommandBufferWrapper out_of_order_command_buffer;
-    clEventWrapper event;
 };
 
-#undef CHECK_VERIFICATION_ERROR
-
-template <class T>
-int MakeAndRunTest(cl_device_id device, cl_context context,
-                   cl_command_queue queue, int num_elements)
-{
-    CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device);
-
-    auto test_fixture = T(device, context, queue);
-    cl_int error = test_fixture.SetUp(num_elements);
-    test_error_ret(error, "Error in test initialization", TEST_FAIL);
-
-    if (test_fixture.Skip())
-    {
-        return TEST_SKIPPED_ITSELF;
-    }
-
-    error = test_fixture.Run();
-    test_error_ret(error, "Test Failed", TEST_FAIL);
-
-    return TEST_PASS;
-}
 } // anonymous namespace
 
 int test_single_ndrange(cl_device_id device, cl_context context,
@@ -574,15 +429,3 @@ int test_explicit_flush(cl_device_id device, cl_context context,
     return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
                                              num_elements);
 }
-
-int test_user_events(cl_device_id device, cl_context context,
-                     cl_command_queue queue, int num_elements)
-{
-    return MakeAndRunTest<UserEventTest>(device, context, queue, num_elements);
-}
-
-int test_out_of_order(cl_device_id device, cl_context context,
-                      cl_command_queue queue, int num_elements)
-{
-    return MakeAndRunTest<OutOfOrderTest>(device, context, queue, num_elements);
-}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
new file mode 100644
index 00000000..a20229e0
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
@@ -0,0 +1,102 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _CL_KHR_BASIC_COMMAND_BUFFER_H
+#define _CL_KHR_BASIC_COMMAND_BUFFER_H
+
+#include "command_buffer_test_base.h"
+#include "harness/typeWrappers.h"
+
+#define ADD_PROP(prop)                                                         \
+    {                                                                          \
+        prop, #prop                                                            \
+    }
+
+#define CHECK_VERIFICATION_ERROR(reference, result, index)                     \
+    {                                                                          \
+        if (reference != result)                                               \
+        {                                                                      \
+            log_error("Expected %d was %d at index %u\n", reference, result,   \
+                      index);                                                  \
+            return TEST_FAIL;                                                  \
+        }                                                                      \
+    }
+
+// Helper test fixture for constructing OpenCL objects used in testing
+// a variety of simple command-buffer enqueue scenarios.
+struct BasicCommandBufferTest : CommandBufferTestBase
+{
+
+    BasicCommandBufferTest(cl_device_id device, cl_context context,
+                           cl_command_queue queue);
+
+    virtual bool Skip();
+    virtual cl_int SetUpKernel(void);
+    virtual cl_int SetUpKernelArgs(void);
+    virtual cl_int SetUp(int elements);
+
+    // Test body returning an OpenCL error code
+    virtual cl_int Run() = 0;
+
+protected:
+    virtual size_t data_size() const { return num_elements * sizeof(cl_int); }
+
+    cl_context context;
+    clCommandQueueWrapper queue;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper in_mem, out_mem, off_mem;
+    size_t num_elements;
+
+    // Device support query results
+    bool simultaneous_use_support;
+    bool out_of_order_support;
+
+    // user request for simultaneous use
+    bool simultaneous_use_requested;
+    unsigned buffer_size_multiplier;
+    clCommandBufferWrapper command_buffer;
+};
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements)
+{
+    CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device);
+
+    try
+    {
+        auto test_fixture = T(device, context, queue);
+
+        if (test_fixture.Skip())
+        {
+            return TEST_SKIPPED_ITSELF;
+        }
+
+        cl_int error = test_fixture.SetUp(num_elements);
+        test_error_ret(error, "Error in test initialization", TEST_FAIL);
+
+        error = test_fixture.Run();
+        test_error_ret(error, "Test Failed", TEST_FAIL);
+    } catch (const std::runtime_error &e)
+    {
+        log_error("%s", e.what());
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
+#endif // _CL_KHR_BASIC_COMMAND_BUFFER_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
new file mode 100644
index 00000000..e0625833
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    mutable_command_info.cpp
+    ../basic_command_buffer.cpp
+)
+
+include(../../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
new file mode 100644
index 00000000..97075792
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
@@ -0,0 +1,42 @@
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "harness/testHarness.h"
+
+test_definition test_list[] = {
+    ADD_TEST(mutable_command_info_device_query),
+    ADD_TEST(mutable_command_info_buffer),
+    ADD_TEST(mutable_command_properties_array),
+    ADD_TEST(mutable_command_kernel),
+    ADD_TEST(mutable_command_dimensions),
+    ADD_TEST(mutable_command_info_type),
+    ADD_TEST(mutable_command_info_queue),
+    ADD_TEST(mutable_command_info_global_work_offset),
+    ADD_TEST(mutable_command_info_local_work_size),
+    ADD_TEST(mutable_command_info_global_work_size),
+};
+
+int main(int argc, const char *argv[])
+{
+    // A device may report the required properties of a queue that
+    // is compatible with command-buffers via the query
+    // CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR. We account
+    // for this in the tests themselves, rather than here, where we have a
+    // device to query.
+    const cl_command_queue_properties queue_properties = 0;
+    return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list,
+                                   false, queue_properties, nullptr);
+    return 0;
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
new file mode 100644
index 00000000..9056a00d
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
@@ -0,0 +1,107 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _CL_KHR_MUTABLE_COMMAND_BASIC_H
+#define _CL_KHR_MUTABLE_COMMAND_BASIC_H
+
+#include "../basic_command_buffer.h"
+#include "../command_buffer_test_base.h"
+
+struct BasicMutableCommandBufferTest : BasicCommandBufferTest
+{
+    BasicMutableCommandBufferTest(cl_device_id device, cl_context context,
+                                  cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue)
+    {}
+
+    virtual cl_int SetUp(int elements) override
+    {
+        BasicCommandBufferTest::SetUp(elements);
+
+        cl_int error = init_extension_functions();
+        test_error(error, "Unable to initialise extension functions");
+
+        const cl_command_buffer_properties_khr props[] = {
+            CL_COMMAND_BUFFER_FLAGS_KHR,
+            CL_COMMAND_BUFFER_MUTABLE_KHR,
+            0,
+        };
+
+        command_buffer = clCreateCommandBufferKHR(1, &queue, props, &error);
+        test_error(error, "Unable to create command buffer");
+
+        clProgramWrapper program = clCreateProgramWithSource(
+            context, 1, &kernelString, nullptr, &error);
+        test_error(error, "Unable to create program");
+
+        error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+        test_error(error, "Unable to build program");
+
+        kernel = clCreateKernel(program, "empty", &error);
+        test_error(error, "Unable to create kernel");
+
+        return error;
+    }
+
+    bool Skip() override
+    {
+        bool extension_avaliable =
+            is_extension_available(device,
+                                   "cl_khr_command_buffer_mutable_dispatch")
+            == true;
+
+        cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+        bool mutable_support =
+            !clGetDeviceInfo(
+                device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+                sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+            && mutable_capabilities != 0;
+
+        return !mutable_support || !extension_avaliable
+            || BasicCommandBufferTest::Skip();
+    }
+
+    cl_int init_extension_functions()
+    {
+        BasicCommandBufferTest::init_extension_functions();
+
+        cl_platform_id platform;
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id),
+                            &platform, nullptr);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+        // If it is supported get the addresses of all the APIs here.
+#define GET_EXTENSION_ADDRESS(FUNC)                                            \
+    FUNC = reinterpret_cast<FUNC##_fn>(                                        \
+        clGetExtensionFunctionAddressForPlatform(platform, #FUNC));            \
+    if (FUNC == nullptr)                                                       \
+    {                                                                          \
+        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed"     \
+                  " with " #FUNC "\n");                                        \
+        return TEST_FAIL;                                                      \
+    }
+        GET_EXTENSION_ADDRESS(clGetMutableCommandInfoKHR);
+
+        return CL_SUCCESS;
+    }
+
+    clGetMutableCommandInfoKHR_fn clGetMutableCommandInfoKHR = nullptr;
+    const char* kernelString = "__kernel void empty() {}";
+    const size_t global_work_size = 4 * sizeof(cl_int);
+};
+
+#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H
+\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp
new file mode 100644
index 00000000..cc425a4d
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp
@@ -0,0 +1,497 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <extensionHelpers.h>
+#include "typeWrappers.h"
+#include "procs.h"
+#include "testHarness.h"
+#include <vector>
+#include <iostream>
+#include <random>
+#include <cstring>
+#include <algorithm>
+#include <memory>
+#include "mutable_command_basic.h"
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases:
+//
+// CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR
+// CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR
+// CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR
+// CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR
+// CL_MUTABLE_DISPATCH_KERNEL_KHR
+// CL_MUTABLE_DISPATCH_DIMENSIONS_KHR
+// CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR
+// CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR
+// CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR
+// CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR
+
+struct InfoDeviceQuery : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    InfoDeviceQuery(cl_device_id device, cl_context context,
+                    cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+        cl_int error = clGetDeviceInfo(
+            device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+            sizeof(mutable_capabilities), &mutable_capabilities, nullptr);
+        test_error(error, "clGetDeviceInfo failed");
+
+        if (!mutable_capabilities)
+        {
+            log_error("Device does not support update arguments to a "
+                      "mutable-dispatch.");
+            return TEST_FAIL;
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+struct InfoBuffer : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    InfoBuffer(cl_device_id device, cl_context context, cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR,
+            sizeof(test_command_buffer), &test_command_buffer, nullptr);
+        test_error(error, "clGetMutableCommandInfoKHR failed");
+
+        if (test_command_buffer != command_buffer)
+        {
+            log_error("ERROR: Incorrect command buffer returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_command_buffer_khr test_command_buffer = nullptr;
+    cl_mutable_command_khr command = nullptr;
+};
+
+struct PropertiesArray : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    PropertiesArray(cl_device_id device, cl_context context,
+                    cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_ndrange_kernel_command_properties_khr props[] = {
+            CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+            CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+        };
+
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, props, kernel, 1, nullptr,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        cl_ndrange_kernel_command_properties_khr test_props[] = { 0, 0, 0 };
+        size_t size;
+
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR,
+            sizeof(test_props), test_props, &size);
+        test_error(error, "clGetMutableCommandInfoKHR failed");
+
+        if (size != sizeof(props) || test_props[0] != props[0]
+            || test_props[1] != props[1])
+        {
+            log_error("ERROR: Incorrect command buffer returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+};
+
+struct Kernel : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    Kernel(cl_device_id device, cl_context context, cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        cl_kernel test_kernel;
+        size_t size;
+
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_DISPATCH_KERNEL_KHR, sizeof(test_kernel),
+            &test_kernel, &size);
+        test_error(error, "clGetMutableCommandInfoKHR failed");
+
+        // We can not check if this is the right kernel because this is an
+        // opaque object.
+        if (test_kernel != kernel)
+        {
+            log_error("ERROR: Incorrect command buffer returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+};
+
+struct Dimensions : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    Dimensions(cl_device_id device, cl_context context, cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, dimensions, nullptr,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        size_t test_dimensions;
+
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_DISPATCH_DIMENSIONS_KHR,
+            sizeof(test_dimensions), &test_dimensions, nullptr);
+        test_error(error, "clGetMutableCommandInfoKHR failed");
+
+        if (test_dimensions != dimensions)
+        {
+            log_error("ERROR: Incorrect command buffer returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+    const size_t dimensions = 3;
+};
+
+struct InfoType : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    InfoType(cl_device_id device, cl_context context, cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        cl_command_type type = 0;
+        error = clGetMutableCommandInfoKHR(command,
+                                           CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR,
+                                           sizeof(type), &type, NULL);
+        test_error(error, "clGetMutableCommandInfoKHR failed");
+
+        if (type != CL_COMMAND_NDRANGE_KERNEL)
+        {
+            log_error("ERROR: Wrong type returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+};
+
+struct InfoQueue : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    InfoQueue(cl_device_id device, cl_context context, cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        cl_command_queue test_queue = nullptr;
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR, sizeof(test_queue),
+            &test_queue, nullptr);
+        test_error(error, "clGetMutableCommandInfoKHR failed");
+
+        if (test_queue != queue)
+        {
+            log_error("ERROR: Incorrect queue returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+};
+
+struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    InfoGlobalWorkOffset(cl_device_id device, cl_context context,
+                         cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, &global_work_offset,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR,
+            sizeof(test_global_work_offset), &test_global_work_offset, nullptr);
+
+        if (test_global_work_offset != global_work_offset)
+        {
+            log_error("ERROR: Wrong size returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+    const size_t global_work_offset = 4 * sizeof(cl_int);
+    size_t test_global_work_offset = 0;
+};
+
+struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    InfoGlobalWorkSize(cl_device_id device, cl_context context,
+                       cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR,
+            sizeof(test_global_work_size), &test_global_work_size, nullptr);
+
+        if (test_global_work_size != global_work_size)
+        {
+            log_error("ERROR: Wrong size returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return TEST_PASS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+    size_t test_global_work_size = 0;
+};
+
+struct InfoLocalWorkSize : public BasicMutableCommandBufferTest
+{
+    using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+    InfoLocalWorkSize(cl_device_id device, cl_context context,
+                      cl_command_queue queue)
+        : BasicMutableCommandBufferTest(device, context, queue)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &global_work_size, &local_work_size, 0, nullptr, nullptr, &command);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clGetMutableCommandInfoKHR(
+            command, CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR,
+            sizeof(test_local_work_size), &test_local_work_size, nullptr);
+
+        if (test_local_work_size != local_work_size)
+        {
+            log_error("ERROR: Wrong size returned from "
+                      "clGetMutableCommandInfoKHR.");
+            return TEST_FAIL;
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    cl_mutable_command_khr command = nullptr;
+    const size_t local_work_size = 4 * sizeof(cl_int);
+    size_t test_local_work_size = 0;
+};
+
+int test_mutable_command_info_device_query(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements)
+{
+    return MakeAndRunTest<InfoDeviceQuery>(device, context, queue,
+                                           num_elements);
+}
+
+int test_mutable_command_info_buffer(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<InfoBuffer>(device, context, queue, num_elements);
+}
+
+int test_mutable_command_properties_array(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements)
+{
+    return MakeAndRunTest<PropertiesArray>(device, context, queue,
+                                           num_elements);
+}
+
+int test_mutable_command_kernel(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<Kernel>(device, context, queue, num_elements);
+}
+
+int test_mutable_command_dimensions(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<Dimensions>(device, context, queue, num_elements);
+}
+
+int test_mutable_command_info_type(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<InfoType>(device, context, queue, num_elements);
+}
+
+int test_mutable_command_info_queue(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<InfoQueue>(device, context, queue, num_elements);
+}
+
+int test_mutable_command_info_global_work_offset(cl_device_id device,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements)
+{
+    return MakeAndRunTest<InfoGlobalWorkOffset>(device, context, queue,
+                                                num_elements);
+}
+
+int test_mutable_command_info_global_work_size(cl_device_id device,
+                                               cl_context context,
+                                               cl_command_queue queue,
+                                               int num_elements)
+{
+    return MakeAndRunTest<InfoGlobalWorkSize>(device, context, queue,
+                                              num_elements);
+}
+
+int test_mutable_command_info_local_work_size(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements)
+{
+    return MakeAndRunTest<InfoLocalWorkSize>(device, context, queue,
+                                             num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
new file mode 100644
index 00000000..08512cae
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
@@ -0,0 +1,62 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
+#define _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
+
+#include <CL/cl.h>
+
+
+// Basic mutable dispatch tests
+extern int test_mutable_command_info_device_query(cl_device_id device,
+                                                  cl_context context,
+                                                  cl_command_queue queue,
+                                                  int num_elements);
+extern int test_mutable_command_info_buffer(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_mutable_command_info_type(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_mutable_command_info_queue(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
+extern int test_mutable_command_properties_array(cl_device_id device,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements);
+extern int test_mutable_command_kernel(cl_device_id device, cl_context context,
+                                       cl_command_queue queue,
+                                       int num_elements);
+extern int test_mutable_command_dimensions(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
+extern int test_mutable_command_info_global_work_offset(cl_device_id device,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements);
+extern int test_mutable_command_info_local_work_size(cl_device_id device,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     int num_elements);
+extern int test_mutable_command_info_global_work_size(cl_device_id device,
+                                                      cl_context context,
+                                                      cl_command_queue queue,
+                                                      int num_elements);
+#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp
new file mode 100644
index 00000000..be8530b2
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp
@@ -0,0 +1,1040 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#include <vector>
+
+//--------------------------------------------------------------------------
+enum class EventMode
+{
+    RET_REGULAR_WAIT_FOR_COMBUF = 0,
+    RET_COMBUF_WAIT_FOR_COMBUF,
+    RET_COMBUF_WAIT_FOR_SEC_COMBUF,
+    RET_EVENT_CALLBACK,
+    RET_CLWAITFOREVENTS_SINGLE,
+    RET_CLWAITFOREVENTS,
+    RET_COMBUF_WAIT_FOR_REGULAR,
+    RET_WAIT_FOR_SEC_QUEUE_EVENT,
+    USER_EVENT_WAIT,
+    USER_EVENTS_WAIT,
+    USER_EVENT_CALLBACK
+};
+
+//--------------------------------------------------------------------------
+void CL_CALLBACK combuf_event_callback_function(cl_event event,
+                                                cl_int commandStatus,
+                                                void *userData)
+{
+    bool *pdata = static_cast<bool *>(userData);
+    log_info("\tEvent callback of clEnqueueCommandBufferKHR triggered\n");
+    *pdata = true;
+}
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// event sync test cases for cl_khr_command_buffer which handles:
+// -test that an event returned by a command-buffer enqueue can be waited on by
+//  regular commands
+// -test that an event returned by a command-buffer enqueue can
+//  be waited on an enqueue of the same command-buffer
+// -tests that a command buffer enqueue can wait on the enqueue of a different
+//  command buffer
+// -test clSetEventCallback works correctly on an event returned by
+//  clEnqueueCommandBufferKHR
+// -test clWaitForEvents on a single event returned from a
+//  clEnqueueCommandBufferKHR
+// -test clWaitForEvents on multiple events returned from different
+//  clEnqueueCommandBufferKHR calls
+
+
+//
+//
+// -test clSetEventCallback works correctly on an user defined event waited by
+// clEnqueueCommandBufferKHR
+//
+//
+
+template <EventMode event_mode, bool out_of_order_requested>
+struct CommandBufferEventSync : public BasicCommandBufferTest
+{
+    CommandBufferEventSync(cl_device_id device, cl_context context,
+                           cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue),
+          command_buffer_sec(this), kernel_sec(nullptr), in_mem_sec(nullptr),
+          out_mem_sec(nullptr), off_mem_sec(nullptr), test_event(nullptr)
+    {
+        simultaneous_use_requested =
+            (event_mode == EventMode::RET_COMBUF_WAIT_FOR_COMBUF) ? true
+                                                                  : false;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernel() override
+    {
+        cl_int error = BasicCommandBufferTest::SetUpKernel();
+        test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
+
+        // due to possible out-of-order command queue copy the kernel for below
+        // case scenarios
+        if (event_mode == EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF
+            || event_mode == EventMode::RET_CLWAITFOREVENTS)
+        {
+            kernel_sec = clCreateKernel(program, "copy", &error);
+            test_error(error, "Failed to create copy kernel");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernelArgs() override
+    {
+        // due to possible out-of-order command queue it is necessary to create
+        // separate set of kernel args for below cases
+        if (event_mode == EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF
+            || event_mode == EventMode::RET_CLWAITFOREVENTS)
+        {
+            // setup arguments for secondary kernel
+            std::swap(kernel, kernel_sec);
+
+            cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
+            test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
+
+            // swap arguments for base class setup
+            in_mem_sec = in_mem;
+            out_mem_sec = out_mem;
+            off_mem_sec = off_mem;
+            std::swap(kernel, kernel_sec);
+        }
+
+        cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
+        test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
+
+        if (out_of_order_requested && out_of_order_support)
+        {
+            queue = clCreateCommandQueue(context, device,
+                                         CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+                                         &error);
+            test_error(error, "Unable to create command queue to test with");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        if (event_mode == EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF
+            || event_mode == EventMode::RET_CLWAITFOREVENTS)
+        {
+            command_buffer_sec =
+                clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+            test_error(error, "clCreateCommandBufferKHR failed");
+        }
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        if (simultaneous_use_requested && !simultaneous_use_support)
+            return true;
+
+        if (out_of_order_requested && !out_of_order_support) return true;
+
+        return false;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        // record command buffer
+        error = RecordCommandBuffer(command_buffer, kernel);
+        test_error(error, "RecordCommandBuffer failed");
+
+        switch (event_mode)
+        {
+            case EventMode::RET_REGULAR_WAIT_FOR_COMBUF:
+                error = RunRegularWaitForCombuf();
+                test_error(error, "RunRegularWaitForCombuf failed");
+                break;
+            case EventMode::RET_COMBUF_WAIT_FOR_COMBUF:
+                error = RunCombufWaitForCombuf();
+                test_error(error, "RunCombufWaitForCombuf failed");
+                break;
+            case EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF:
+                error = RunCombufWaitForSecCombuf();
+                test_error(error, "RunCombufWaitForSecCombuf failed");
+                break;
+            case EventMode::RET_EVENT_CALLBACK:
+                error = RunReturnEventCallback();
+                test_error(error, "RunReturnEventCallback failed");
+                break;
+            case EventMode::RET_CLWAITFOREVENTS_SINGLE:
+                error = RunWaitForEvent();
+                test_error(error, "RunWaitForEvent failed");
+                break;
+            case EventMode::RET_CLWAITFOREVENTS:
+                error = RunWaitForEvents();
+                test_error(error, "RunWaitForEvents failed");
+                break;
+            case EventMode::RET_COMBUF_WAIT_FOR_REGULAR:
+                error = RunCombufWaitForRegular();
+                test_error(error, "RunCombufWaitForRegular failed");
+                break;
+            case EventMode::RET_WAIT_FOR_SEC_QUEUE_EVENT:
+                error = RunCombufWaitForSecQueueCombuf();
+                test_error(error, "RunCombufWaitForSecQueueCombuf failed");
+                break;
+            case EventMode::USER_EVENT_WAIT:
+                error = RunUserEventWait();
+                test_error(error, "RunUserEventWait failed");
+                break;
+            case EventMode::USER_EVENTS_WAIT:
+                error = RunUserEventsWait();
+                test_error(error, "RunUserEventsWait failed");
+                break;
+            case EventMode::USER_EVENT_CALLBACK:
+                error = RunUserEventCallback();
+                test_error(error, "RunUserEventCallback failed");
+                break;
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordCommandBuffer(clCommandBufferWrapper &combuf,
+                               clKernelWrapper &kern)
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            combuf, nullptr, nullptr, kern, 1, nullptr, &num_elements, nullptr,
+            0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(combuf);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    void InitInOrderEvents(std::vector<cl_event *> &event_ptrs)
+    {
+        if (out_of_order_requested)
+        {
+            in_order_events.resize(event_ptrs.size());
+            for (size_t i = 0; i < in_order_events.size(); i++)
+            {
+                event_ptrs[i] = &in_order_events[i];
+            }
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunRegularWaitForCombuf()
+    {
+        std::vector<cl_int> output_data(num_elements);
+
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of commands
+        std::vector<cl_event *> event_ptrs = { nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error =
+            clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int), 0,
+                                data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, command_buffer, wait_count, event_ptrs[0], &test_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error =
+            clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                output_data.data(), 1, &test_event, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunCombufWaitForCombuf()
+    {
+        std::vector<cl_int> output_data(num_elements);
+
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr, nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error =
+            clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int), 0,
+                                data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, command_buffer, wait_count, event_ptrs[0], &test_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+                                          &test_event, event_ptrs[1]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), wait_count,
+                                    event_ptrs[1], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunCombufWaitForSecCombuf()
+    {
+        std::vector<cl_int> output_data(num_elements);
+
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr, nullptr, nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        // record other command buffer
+        cl_int error = RecordCommandBuffer(command_buffer_sec, kernel_sec);
+        test_error(error, "RecordCommandBuffer failed");
+
+        error =
+            clEnqueueFillBuffer(queue, in_mem_sec, &pattern_pri, sizeof(cl_int),
+                                0, data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error =
+            clEnqueueCommandBufferKHR(0, nullptr, command_buffer_sec,
+                                      wait_count, event_ptrs[0], &test_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern_sec, sizeof(cl_int),
+                                    0, data_size(), 0, nullptr, event_ptrs[1]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        cl_event wait_list[] = { test_event,
+                                 event_ptrs[1] != nullptr ? *event_ptrs[1]
+                                                          : nullptr };
+        error =
+            clEnqueueCommandBufferKHR(0, nullptr, command_buffer,
+                                      1 + wait_count, wait_list, event_ptrs[2]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), wait_count,
+                                    event_ptrs[2], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_sec, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunReturnEventCallback()
+    {
+        std::vector<cl_int> output_data(num_elements);
+
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error =
+            clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int), 0,
+                                data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, command_buffer, wait_count, event_ptrs[0], &test_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        bool confirmation = false;
+        error =
+            clSetEventCallback(test_event, CL_COMPLETE,
+                               combuf_event_callback_function, &confirmation);
+        test_error(error, "clSetEventCallback failed");
+
+        error = clWaitForEvents(1, &test_event);
+        test_error(error, "clWaitForEvents failed");
+
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result
+        if (!confirmation)
+        {
+            log_error("combuf_event_callback_function invocation failure\n");
+            return TEST_FAIL;
+        }
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunWaitForEvent()
+    {
+        std::vector<cl_int> output_data(num_elements);
+
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error =
+            clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int), 0,
+                                data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, command_buffer, wait_count, event_ptrs[0], &test_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clWaitForEvents(1, &test_event);
+        test_error(error, "clWaitForEvents failed");
+
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunWaitForEvents()
+    {
+        std::vector<cl_int> output_data(num_elements);
+        clEventWrapper test_events[2];
+
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr, nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        // record other command buffer
+        cl_int error = RecordCommandBuffer(command_buffer_sec, kernel_sec);
+        test_error(error, "RecordCommandBuffer failed");
+
+        error =
+            clEnqueueFillBuffer(queue, in_mem_sec, &pattern_pri, sizeof(cl_int),
+                                0, data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer_sec,
+                                          wait_count, event_ptrs[0],
+                                          &test_events[0]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern_sec, sizeof(cl_int),
+                                    0, data_size(), 0, nullptr, event_ptrs[1]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error =
+            clEnqueueCommandBufferKHR(0, nullptr, command_buffer, wait_count,
+                                      event_ptrs[1], &test_events[1]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        cl_event wait_list[] = { test_events[0], test_events[1] };
+        error = clWaitForEvents(2, wait_list);
+        test_error(error, "clWaitForEvents failed");
+
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_sec, output_data[i], i);
+        }
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunCombufWaitForRegular()
+    {
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of commands
+        std::vector<cl_event *> event_ptrs = { nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error =
+            clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int), 0,
+                                data_size(), 0, nullptr, &test_event);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+                                          &test_event, event_ptrs[0]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), wait_count,
+                                    event_ptrs[0], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunCombufWaitForSecQueueCombuf()
+    {
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr, nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error = CL_SUCCESS;
+
+        // create secondary command queue and command buffer
+        clCommandQueueWrapper queue_sec =
+            clCreateCommandQueue(context, device, 0, &error);
+        test_error(error, "Unable to create command queue to test with");
+
+        command_buffer_sec =
+            clCreateCommandBufferKHR(1, &queue_sec, nullptr, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        // record secondary command buffer
+        error = RecordCommandBuffer(command_buffer_sec, kernel);
+        test_error(error, "RecordCommandBuffer failed");
+
+        // process secondary queue
+        error =
+            clEnqueueFillBuffer(queue_sec, in_mem, &pattern_pri, sizeof(cl_int),
+                                0, data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer_sec, 0,
+                                          nullptr, &test_event);
+        test_error(error,
+                   "clEnqueueCommandBufferKHR in secondary queue failed");
+
+        // process primary queue
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int),
+                                    0, data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        cl_event wait_list[] = { test_event,
+                                 event_ptrs[0] != nullptr ? *event_ptrs[0]
+                                                          : nullptr };
+        error =
+            clEnqueueCommandBufferKHR(0, nullptr, command_buffer,
+                                      1 + wait_count, wait_list, event_ptrs[1]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), wait_count,
+                                    event_ptrs[1], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        error = clFinish(queue_sec);
+        test_error(error, "clFinish failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+        return CL_SUCCESS;
+    }
+
+
+    //--------------------------------------------------------------------------
+    cl_int RunUserEventWait()
+    {
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr, nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error = CL_SUCCESS;
+        clEventWrapper user_event = clCreateUserEvent(context, &error);
+        test_error(error, "clCreateUserEvent failed");
+
+        const cl_int pattern = 42;
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        cl_event wait_list[] = { user_event,
+                                 event_ptrs[0] != nullptr ? *event_ptrs[0]
+                                                          : nullptr };
+        error =
+            clEnqueueCommandBufferKHR(0, nullptr, command_buffer,
+                                      wait_count + 1, wait_list, event_ptrs[1]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), wait_count,
+                                    event_ptrs[1], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(error, "clSetUserEventStatus failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunUserEventsWait()
+    {
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr, nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error = CL_SUCCESS;
+        std::vector<clEventWrapper> user_events(user_event_num);
+
+        for (size_t i = 0; i < user_event_num; i++)
+        {
+            user_events[i] = clCreateUserEvent(context, &error);
+            test_error(error, "clCreateUserEvent failed");
+        }
+
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int),
+                                    0, data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        std::vector<cl_event> wait_list(user_event_num + wait_count);
+        for (size_t i = 0; i < user_event_num; i++)
+        {
+            wait_list[i] = user_events[i];
+        }
+        if (out_of_order_requested) wait_list[user_event_num] = *event_ptrs[0];
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer,
+                                          user_event_num + wait_count,
+                                          &wait_list.front(), event_ptrs[1]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), wait_count,
+                                    event_ptrs[1], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < user_event_num; i++)
+        {
+            error = clSetUserEventStatus(user_events[i], CL_COMPLETE);
+            test_error(error, "clSetUserEventStatus failed");
+        }
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunUserEventCallback()
+    {
+        // if out-of-order queue requested it is necessary to secure proper
+        // order of all commands
+        std::vector<cl_event *> event_ptrs = { nullptr, nullptr };
+        InitInOrderEvents(event_ptrs);
+
+        cl_int error = CL_SUCCESS;
+        clEventWrapper user_event = clCreateUserEvent(context, &error);
+        test_error(error, "clCreateUserEvent failed");
+
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern_pri, sizeof(cl_int),
+                                    0, data_size(), 0, nullptr, event_ptrs[0]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        cl_event wait_list[] = { user_event,
+                                 event_ptrs[0] != nullptr ? *event_ptrs[0]
+                                                          : nullptr };
+        error =
+            clEnqueueCommandBufferKHR(0, nullptr, command_buffer,
+                                      wait_count + 1, wait_list, event_ptrs[1]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        bool confirmation = false;
+        error =
+            clSetEventCallback(user_event, CL_COMPLETE,
+                               combuf_event_callback_function, &confirmation);
+        test_error(error, "clSetEventCallback failed");
+
+        error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(error, "clSetUserEventStatus failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), wait_count,
+                                    event_ptrs[1], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result
+        if (!confirmation)
+        {
+            log_error("combuf_event_callback_function invocation failure\n");
+            return TEST_FAIL;
+        }
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+
+    clCommandBufferWrapper command_buffer_sec;
+    clKernelWrapper kernel_sec;
+    clMemWrapper in_mem_sec, out_mem_sec, off_mem_sec;
+    clEventWrapper test_event;
+
+    std::vector<clEventWrapper> in_order_events;
+
+    const cl_int pattern_pri = 0xA;
+    const cl_int pattern_sec = 0xB;
+    const cl_int wait_count = out_of_order_requested ? 1 : 0;
+
+    const cl_int user_event_num = 3;
+};
+
+} // anonymous namespace
+
+// helper macros
+#define IN_ORDER_MSG(name) #name " test with in-order command queue"
+#define OUT_OF_ORDER_MSG(name) #name " test with out-of-order command queue"
+#define test_status_val(code, msg)                                             \
+    {                                                                          \
+        if (code == TEST_FAIL)                                                 \
+        {                                                                      \
+            print_failure_error(code, TEST_PASS, msg " failed\n");             \
+            return TEST_FAIL;                                                  \
+        }                                                                      \
+        else if (code == TEST_SKIP)                                            \
+        {                                                                      \
+            log_info(msg " skipped\n");                                        \
+        }                                                                      \
+    }
+
+//--------------------------------------------------------------------------
+// return-events test cases for regular queue
+int test_regular_wait_for_command_buffer(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements)
+{
+    int status = TEST_PASS;
+    // The approach here is that test scenario which involves out-of-order
+    // command queue may be skipped without breaking in-order queue test.
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_REGULAR_WAIT_FOR_COMBUF, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    OUT_OF_ORDER_MSG(EventMode::RET_REGULAR_WAIT_FOR_COMBUF));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_REGULAR_WAIT_FOR_COMBUF, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    IN_ORDER_MSG(EventMode::RET_REGULAR_WAIT_FOR_COMBUF));
+
+    return status;
+}
+
+int test_command_buffer_wait_for_command_buffer(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_COMBUF_WAIT_FOR_COMBUF, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    OUT_OF_ORDER_MSG(EventMode::RET_COMBUF_WAIT_FOR_COMBUF));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_COMBUF_WAIT_FOR_COMBUF, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    IN_ORDER_MSG(EventMode::RET_COMBUF_WAIT_FOR_COMBUF));
+
+    return status;
+}
+
+int test_command_buffer_wait_for_sec_command_buffer(cl_device_id device,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<CommandBufferEventSync<
+        EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF, true>>(device, context,
+                                                          queue, num_elements);
+    test_status_val(
+        status, OUT_OF_ORDER_MSG(EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF));
+
+    // in-order command queue test
+    status = MakeAndRunTest<CommandBufferEventSync<
+        EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF, false>>(device, context,
+                                                           queue, num_elements);
+    test_status_val(status,
+                    IN_ORDER_MSG(EventMode::RET_COMBUF_WAIT_FOR_SEC_COMBUF));
+
+    return status;
+}
+
+int test_return_event_callback(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_EVENT_CALLBACK, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status, OUT_OF_ORDER_MSG(EventMode::RET_EVENT_CALLBACK));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_EVENT_CALLBACK, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status, IN_ORDER_MSG(EventMode::RET_EVENT_CALLBACK));
+
+    return status;
+}
+
+int test_clwaitforevents_single(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_CLWAITFOREVENTS_SINGLE, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    OUT_OF_ORDER_MSG(EventMode::RET_CLWAITFOREVENTS_SINGLE));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_CLWAITFOREVENTS_SINGLE, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    IN_ORDER_MSG(EventMode::RET_CLWAITFOREVENTS_SINGLE));
+
+    return status;
+}
+
+int test_clwaitforevents(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_CLWAITFOREVENTS, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status, OUT_OF_ORDER_MSG(EventMode::RET_CLWAITFOREVENTS));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_CLWAITFOREVENTS, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status, IN_ORDER_MSG(EventMode::RET_CLWAITFOREVENTS));
+
+    return status;
+}
+
+int test_command_buffer_wait_for_regular(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_COMBUF_WAIT_FOR_REGULAR, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    OUT_OF_ORDER_MSG(EventMode::RET_COMBUF_WAIT_FOR_REGULAR));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_COMBUF_WAIT_FOR_REGULAR, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    IN_ORDER_MSG(EventMode::RET_COMBUF_WAIT_FOR_REGULAR));
+
+    return status;
+}
+
+int test_wait_for_sec_queue_event(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_WAIT_FOR_SEC_QUEUE_EVENT, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    OUT_OF_ORDER_MSG(EventMode::RET_WAIT_FOR_SEC_QUEUE_EVENT));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::RET_WAIT_FOR_SEC_QUEUE_EVENT, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status,
+                    IN_ORDER_MSG(EventMode::RET_WAIT_FOR_SEC_QUEUE_EVENT));
+
+    return status;
+}
+
+//--------------------------------------------------------------------------
+// user-events test cases
+
+int test_user_event_wait(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::USER_EVENT_WAIT, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status, OUT_OF_ORDER_MSG(EventMode::USER_EVENT_WAIT));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::USER_EVENT_WAIT, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status, IN_ORDER_MSG(EventMode::USER_EVENT_WAIT));
+
+    return status;
+}
+
+int test_user_events_wait(cl_device_id device, cl_context context,
+                          cl_command_queue queue, int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::USER_EVENTS_WAIT, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status, OUT_OF_ORDER_MSG(EventMode::USER_EVENTS_WAIT));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::USER_EVENTS_WAIT, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status, IN_ORDER_MSG(EventMode::USER_EVENTS_WAIT));
+
+    return status;
+}
+
+int test_user_event_callback(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements)
+{
+    int status = TEST_PASS;
+    // out-of-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::USER_EVENT_CALLBACK, true>>(
+        device, context, queue, num_elements);
+    test_status_val(status, OUT_OF_ORDER_MSG(EventMode::USER_EVENT_CALLBACK));
+
+    // in-order command queue test
+    status = MakeAndRunTest<
+        CommandBufferEventSync<EventMode::USER_EVENT_CALLBACK, false>>(
+        device, context, queue, num_elements);
+    test_status_val(status, IN_ORDER_MSG(EventMode::USER_EVENT_CALLBACK));
+
+    return status;
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
new file mode 100644
index 00000000..3ce410c0
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
@@ -0,0 +1,354 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#include <vector>
+
+//--------------------------------------------------------------------------
+enum class CombufInfoTestMode
+{
+    CITM_QUEUES = 0,
+    CITM_REF_COUNT,
+    CITM_STATE,
+    CITM_PROP_ARRAY,
+};
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// clGetCommandBufferInfoKHR tests for cl_khr_command_buffer which handles below
+// cases:
+// -test case for CL_COMMAND_BUFFER_NUM_QUEUES_KHR &
+//  CL_COMMAND_BUFFER_QUEUES_KHR queries
+// -test case for CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query
+// -test case for CL_COMMAND_BUFFER_STATE_KHR query
+// -test case for CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query
+
+template <CombufInfoTestMode test_mode>
+struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
+{
+    CommandBufferGetCommandBufferInfo(cl_device_id device, cl_context context,
+                                      cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue)
+    {}
+
+    //--------------------------------------------------------------------------
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        switch (test_mode)
+        {
+            case CombufInfoTestMode::CITM_QUEUES:
+                error = RunQueuesInfoTest();
+                test_error(error, "RunQueuesInfoTest failed");
+                break;
+            case CombufInfoTestMode::CITM_REF_COUNT:
+                error = RunRefCountInfoTest();
+                test_error(error, "RunRefCountInfoTest failed");
+                break;
+            case CombufInfoTestMode::CITM_STATE:
+                error = RunStateInfoTest();
+                test_error(error, "RunStateInfoTest failed");
+                break;
+            case CombufInfoTestMode::CITM_PROP_ARRAY:
+                error = RunPropArrayInfoTest();
+                test_error(error, "RunPropArrayInfoTest failed");
+                break;
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordCommandBuffer()
+    {
+        cl_int error = CL_SUCCESS;
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunQueuesInfoTest()
+    {
+        cl_int error = TEST_PASS;
+
+        // record command buffers
+        error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        // vector containter added due to potential future growth, at the moment
+        // spec of cl_khr_command_buffer says command-buffer accepts only 1
+        // queue
+        std::vector<cl_command_queue> expect_queue_list = { queue };
+        cl_uint num_queues = 0;
+        size_t ret_value_size = 0;
+        error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_NUM_QUEUES_KHR, sizeof(cl_uint),
+            &num_queues, &ret_value_size);
+        test_error(error, "clGetCommandBufferInfoKHR failed");
+
+        test_assert_error(
+            ret_value_size == sizeof(cl_int),
+            "Unexpected result of CL_COMMAND_BUFFER_NUM_QUEUES_KHR query!");
+
+        test_assert_error(num_queues == expect_queue_list.size(),
+                          "Unexpected queue list size!");
+
+        std::vector<cl_command_queue> queue_list(num_queues);
+        size_t expect_size = queue_list.size() * sizeof(cl_command_queue);
+        error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_QUEUES_KHR, expect_size,
+            &queue_list.front(), &ret_value_size);
+        test_error(error, "clGetCommandBufferInfoKHR failed");
+
+        test_assert_error(
+            ret_value_size == expect_size,
+            "Unexpected result of CL_COMMAND_BUFFER_NUM_QUEUES_KHR query!");
+
+        // We can not check if this is the right queue because this is an opaque
+        // object, test against NULL.
+        for (int i = 0; i < queue_list.size(); i++)
+        {
+            test_assert_error(
+                queue_list[i] == queue,
+                "clGetCommandBufferInfoKHR return values not as expected\n");
+        }
+        return TEST_PASS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunRefCountInfoTest()
+    {
+        cl_int error = CL_SUCCESS;
+
+        // record command buffer
+        error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        // collect initial reference count
+        cl_uint init_ref_count = 0;
+        error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR,
+            sizeof(cl_uint), &init_ref_count, nullptr);
+        test_error(error, "clGetCommandBufferInfoKHR failed");
+
+        // increase reference count through clRetainCommandBufferKHR calls
+        const cl_int min_retain_count = 2;
+        const cl_int max_retain_count = 6;
+        cl_int retain_count = std::max(
+            min_retain_count, min_retain_count + rand() % max_retain_count);
+
+        for (int i = 0; i < retain_count; i++)
+        {
+            error = clRetainCommandBufferKHR(command_buffer);
+            test_error(error, "clRetainCommandBufferKHR failed");
+        }
+
+        // verify new reference count value
+        cl_uint new_ref_count = 0;
+        error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR,
+            sizeof(cl_uint), &new_ref_count, nullptr);
+        test_error(error, "clGetCommandBufferInfoKHR failed");
+
+        test_assert_error(new_ref_count == (retain_count + init_ref_count),
+                          "Unexpected result of "
+                          "CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query!");
+
+        // decrease reference count through clReleaseCommandBufferKHR calls
+        for (int i = 0; i < retain_count; i++)
+        {
+            error = clReleaseCommandBufferKHR(command_buffer);
+            test_error(error, "clReleaseCommandBufferKHR failed");
+        }
+
+        // verify new reference count value
+        error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR,
+            sizeof(cl_uint), &new_ref_count, nullptr);
+        test_error(error, "clGetCommandBufferInfoKHR failed");
+
+        test_assert_error(new_ref_count == init_ref_count,
+                          "Unexpected result of "
+                          "CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query!");
+
+        return TEST_PASS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunStateInfoTest()
+    {
+        cl_int error = CL_SUCCESS;
+
+        // lambda to verify given state
+        auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
+            cl_command_buffer_state_khr state =
+                CL_COMMAND_BUFFER_STATE_INVALID_KHR;
+
+            cl_int error = clGetCommandBufferInfoKHR(
+                command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state),
+                &state, nullptr);
+            test_error_ret(error, "clGetCommandBufferInfoKHR failed",
+                           TEST_FAIL);
+
+            test_assert_error(
+                state == expected,
+                "Unexpected result of CL_COMMAND_BUFFER_STATE_KHR query!");
+
+            return TEST_PASS;
+        };
+
+        // verify recording state
+        error = verify_state(CL_COMMAND_BUFFER_STATE_RECORDING_KHR);
+        test_error(error, "verify_state failed");
+
+        // record command buffer
+        error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        // verify executable state
+        error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
+        test_error(error, "verify_state failed");
+
+        error = clEnqueueFillBuffer(queue, out_mem, &pattern, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        clEventWrapper trigger_event = clCreateUserEvent(context, &error);
+        test_error(error, "clCreateUserEvent failed");
+
+        // enqueued command buffer blocked on user event
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+                                          &trigger_event, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        // verify pending state
+        error = verify_state(CL_COMMAND_BUFFER_STATE_PENDING_KHR);
+
+        // execute command buffer
+        cl_int signal_error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
+
+        test_error(error, "verify_state failed");
+
+        test_error(signal_error, "clSetUserEventStatus failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunPropArrayInfoTest()
+    {
+        cl_int error = CL_SUCCESS;
+
+        // record command buffer
+        error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        size_t ret_value_size = 0;
+        std::vector<cl_command_buffer_properties_khr> combuf_props;
+        error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR, 0, nullptr,
+            &ret_value_size);
+        test_error_ret(error, "clGetCommandBufferInfoKHR failed", TEST_FAIL);
+
+        // command buffer created without sumultaneous use ? 0 size possible
+        if (!simultaneous_use_support && ret_value_size == 0) return TEST_PASS;
+
+        // ... otherwise 0 size prop array is not an acceptable value
+        test_assert_error(ret_value_size != 0,
+                          "Unexpected result of "
+                          "CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query!");
+
+        cl_uint num_ret_props =
+            ret_value_size / sizeof(cl_command_buffer_properties_khr);
+        test_assert_error(num_ret_props != 0,
+                          "Unexpected result of "
+                          "CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query!");
+
+        combuf_props.resize(num_ret_props);
+        error = clGetCommandBufferInfoKHR(
+            command_buffer, CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR,
+            num_ret_props * sizeof(cl_command_buffer_properties_khr),
+            combuf_props.data(), nullptr);
+        test_error_ret(error, "clGetCommandBufferInfoKHR failed", TEST_FAIL);
+
+        if (simultaneous_use_support)
+        {
+            // in simultaneous use case at least 3 elements in array expected
+            test_assert_error(num_ret_props >= 3,
+                              "Unexpected result of "
+                              "CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query!");
+
+            if (combuf_props[0] == CL_COMMAND_BUFFER_FLAGS_KHR
+                && combuf_props[1] == CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR
+                && combuf_props.back() == 0)
+                return TEST_PASS;
+        }
+        else
+        {
+            if (combuf_props.back() == 0) return TEST_PASS;
+        }
+
+        return TEST_FAIL;
+    }
+
+    const cl_int pattern = 0xE;
+};
+
+} // anonymous namespace
+
+
+int test_info_queues(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<
+        CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_QUEUES>>(
+        device, context, queue, num_elements);
+}
+
+int test_info_ref_count(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<
+        CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_REF_COUNT>>(
+        device, context, queue, num_elements);
+}
+
+int test_info_state(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<
+        CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_STATE>>(
+        device, context, queue, num_elements);
+}
+
+int test_info_prop_array(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<
+        CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_PROP_ARRAY>>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_out_of_order.cpp
new file mode 100644
index 00000000..9e142bf2
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_out_of_order.cpp
@@ -0,0 +1,352 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#include <vector>
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// out-of-order tests for cl_khr_command_buffer which handles below cases:
+// -test case for out-of-order command-buffer
+// -test an out-of-order command-buffer with simultaneous use
+
+template <bool simultaneous_request>
+struct OutOfOrderTest : public BasicCommandBufferTest
+{
+    OutOfOrderTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue),
+          out_of_order_queue(nullptr), out_of_order_command_buffer(this),
+          user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
+          program_fill(nullptr)
+    {
+        simultaneous_use_requested = simultaneous_request;
+        if (simultaneous_request) buffer_size_multiplier = 2;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernel() override
+    {
+        // if device doesn't support simultaneous use which was requested
+        // we can skip creation of OCL resources
+        if (simultaneous_use_requested && !simultaneous_use_support)
+            return CL_SUCCESS;
+
+        cl_int error = BasicCommandBufferTest::SetUpKernel();
+        test_error(error, "BasicCommandBufferTest::SetUpKernel failed");
+
+        // create additional kernel to properly prepare output buffer for test
+        const char* kernel_str =
+            R"(
+          __kernel void fill(int pattern, __global int* out, __global int* offset)
+          {
+              size_t id = get_global_id(0);
+              size_t ind = offset[0] + id;
+              out[ind] = pattern;
+          })";
+
+        error = create_single_kernel_helper_create_program(
+            context, &program_fill, 1, &kernel_str);
+        test_error(error, "Failed to create program with source");
+
+        error =
+            clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr);
+        test_error(error, "Failed to build program");
+
+        kernel_fill = clCreateKernel(program_fill, "fill", &error);
+        test_error(error, "Failed to create copy kernel");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernelArgs() override
+    {
+        // if device doesn't support simultaneous use which was requested
+        // we can skip creation of OCL resources
+        if (simultaneous_use_requested && !simultaneous_use_support)
+            return CL_SUCCESS;
+
+        cl_int error = BasicCommandBufferTest::SetUpKernelArgs();
+        test_error(error, "BasicCommandBufferTest::SetUpKernelArgs failed");
+
+        error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int),
+                               &overwritten_pattern);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        out_of_order_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+        test_error(error, "Unable to create command queue to test with");
+
+        cl_command_buffer_properties_khr properties[3] = {
+            CL_COMMAND_BUFFER_FLAGS_KHR, 0, 0
+        };
+
+        if (simultaneous_use_requested && simultaneous_use_support)
+            properties[1] = CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR;
+
+        out_of_order_command_buffer = clCreateCommandBufferKHR(
+            1, &out_of_order_queue, properties, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        if (!out_of_order_support
+            || (simultaneous_use_requested && !simultaneous_use_support))
+            return true;
+
+        return false;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        if (simultaneous_use_support)
+        {
+            // enqueue simultaneous command-buffers with out-of-order calls
+            error = RunSimultaneous();
+            test_error(error, "RunSimultaneous failed");
+        }
+        else
+        {
+            // enqueue single command-buffer with out-of-order calls
+            error = RunSingle();
+            test_error(error, "RunSingle failed");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordCommandBuffer()
+    {
+        cl_sync_point_khr sync_points[2];
+        const cl_int pattern = pattern_pri;
+        cl_int error =
+            clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
+                                   &pattern, sizeof(cl_int), 0, data_size(), 0,
+                                   nullptr, &sync_points[0], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
+                                       out_mem, &overwritten_pattern,
+                                       sizeof(cl_int), 0, data_size(), 0,
+                                       nullptr, &sync_points[1], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &num_elements, nullptr, 2, sync_points, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSingle()
+    {
+        cl_int error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, out_of_order_command_buffer, 0, nullptr, &user_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
+                                    data_size(), output_data.data(), 1,
+                                    &user_event, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordSimultaneousCommandBuffer() const
+    {
+        cl_sync_point_khr sync_points[2];
+        // for both simultaneous passes this call will fill entire in_mem buffer
+        cl_int error = clCommandFillBufferKHR(
+            out_of_order_command_buffer, nullptr, in_mem, &pattern_pri,
+            sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
+            &sync_points[0], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        // to avoid overwriting the entire result buffer instead of filling only
+        // relevant part this additional kernel was introduced
+        error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr,
+                                          nullptr, kernel_fill, 1, nullptr,
+                                          &num_elements, nullptr, 0, nullptr,
+                                          &sync_points[1], nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &num_elements, nullptr, 2, sync_points, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    struct SimulPassData
+    {
+        cl_int offset;
+        std::vector<cl_int> output_buffer;
+        // 0:user event, 1:offset-buffer fill event, 2:kernel done event
+        clEventWrapper wait_events[3];
+    };
+
+    //--------------------------------------------------------------------------
+    cl_int EnqueueSimultaneousPass(SimulPassData& pd)
+    {
+        cl_int error = CL_SUCCESS;
+        if (!user_event)
+        {
+            user_event = clCreateUserEvent(context, &error);
+            test_error(error, "clCreateUserEvent failed");
+        }
+
+        pd.wait_events[0] = user_event;
+
+        // filling offset buffer must wait for previous pass completeness
+        error = clEnqueueFillBuffer(
+            out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
+            sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
+            (wait_pass_event != nullptr ? &wait_pass_event : nullptr),
+            &pd.wait_events[1]);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        // command buffer execution must wait for two wait-events
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
+            &pd.wait_events[2]);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
+                                    pd.offset * sizeof(cl_int), data_size(),
+                                    pd.output_buffer.data(), 1,
+                                    &pd.wait_events[2], nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSimultaneous()
+    {
+        cl_int error = RecordSimultaneousCommandBuffer();
+        test_error(error, "RecordSimultaneousCommandBuffer failed");
+
+        cl_int offset = static_cast<cl_int>(num_elements);
+
+        std::vector<SimulPassData> simul_passes = {
+            { 0, std::vector<cl_int>(num_elements) },
+            { offset, std::vector<cl_int>(num_elements) }
+        };
+
+        for (auto&& pass : simul_passes)
+        {
+            error = EnqueueSimultaneousPass(pass);
+            test_error(error, "EnqueueSimultaneousPass failed");
+
+            wait_pass_event = pass.wait_events[2];
+        }
+
+        error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(error, "clSetUserEventStatus failed");
+
+        error = clFinish(out_of_order_queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result buffers
+        for (auto&& pass : simul_passes)
+        {
+            auto& res_data = pass.output_buffer;
+            for (size_t i = 0; i < num_elements; i++)
+            {
+                CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
+            }
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    clCommandQueueWrapper out_of_order_queue;
+    clCommandBufferWrapper out_of_order_command_buffer;
+
+    clEventWrapper user_event;
+    clEventWrapper wait_pass_event;
+
+    clKernelWrapper kernel_fill;
+    clProgramWrapper program_fill;
+
+    const cl_int overwritten_pattern = 0xACDC;
+    const cl_int pattern_pri = 42;
+};
+
+} // anonymous namespace
+
+int test_out_of_order(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue,
+                                                 num_elements);
+}
+
+int test_simultaneous_out_of_order(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue,
+                                                num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
new file mode 100644
index 00000000..eef3e355
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_printf.cpp
@@ -0,0 +1,537 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <harness/os_helpers.h>
+
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#if !defined(_WIN32)
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#endif
+#include <unistd.h>
+#define streamDup(fd1) dup(fd1)
+#define streamDup2(fd1, fd2) dup2(fd1, fd2)
+#endif
+#include <limits.h>
+#include <time.h>
+
+#if defined(_WIN32)
+#include <io.h>
+#define streamDup(fd1) _dup(fd1)
+#define streamDup2(fd1, fd2) _dup2(fd1, fd2)
+#endif
+
+#include <vector>
+#include <list>
+#include <map>
+#include <fstream>
+#include <stdio.h>
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// printf tests for cl_khr_command_buffer which handles below cases:
+// -test cases for device side printf
+// -test cases for device side printf with a simultaneous use command-buffer
+
+template <bool simul_use>
+struct CommandBufferPrintfTest : public BasicCommandBufferTest
+{
+    CommandBufferPrintfTest(cl_device_id device, cl_context context,
+                            cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue),
+          trigger_event(nullptr), wait_event(nullptr), file_descriptor(0),
+          printf_use_support(false)
+    {
+        simultaneous_use_requested = simul_use;
+        if (simul_use)
+        {
+            buffer_size_multiplier = num_test_iters;
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    void ReleaseOutputStream(int fd)
+    {
+        fflush(stdout);
+        streamDup2(fd, fileno(stdout));
+        close(fd);
+    }
+
+    //--------------------------------------------------------------------------
+    int AcquireOutputStream(int* error)
+    {
+        int fd = streamDup(fileno(stdout));
+        *error = 0;
+        if (!freopen(temp_filename.c_str(), "wt", stdout))
+        {
+            ReleaseOutputStream(fd);
+            *error = -1;
+        }
+        return fd;
+    }
+
+    //--------------------------------------------------------------------------
+    void GetAnalysisBuffer(std::stringstream& buffer)
+    {
+        std::ifstream fp(temp_filename, std::ios::in);
+        if (fp.is_open())
+        {
+            buffer << fp.rdbuf();
+        }
+    }
+
+    //--------------------------------------------------------------------------
+    void PurgeTempFile()
+    {
+        std::ofstream ofs(temp_filename,
+                          std::ofstream::out | std::ofstream::trunc);
+        ofs.close();
+    }
+
+    //--------------------------------------------------------------------------
+    bool Skip() override
+    {
+        // Query if device supports kernel printf use
+        cl_device_command_buffer_capabilities_khr capabilities;
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR,
+                            sizeof(capabilities), &capabilities, NULL);
+        test_error(error,
+                   "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
+
+        printf_use_support =
+            (capabilities & CL_COMMAND_BUFFER_CAPABILITY_KERNEL_PRINTF_KHR)
+            != 0;
+
+        if (!printf_use_support) return true;
+        return BasicCommandBufferTest::Skip()
+            || (simultaneous_use_requested && !simultaneous_use_support);
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernel() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        const char* kernel_str =
+            R"(
+      __kernel void print(__global char* in, __global char* out, __global int* offset)
+      {
+          size_t id = get_global_id(0);
+          int ind = offset[0] + offset[1] * id;
+          for(int i=0; i<offset[1]; i++) {
+              out[ind+i] = in[i];
+              printf("%c", in[i]);
+          }
+      })";
+
+        error = create_single_kernel_helper_create_program(context, &program, 1,
+                                                           &kernel_str);
+        test_error(error, "Failed to create program with source");
+
+        error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+        test_error(error, "Failed to build program");
+
+        kernel = clCreateKernel(program, "print", &error);
+        test_error(error, "Failed to create print kernel");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    size_t data_size() const override
+    {
+        return sizeof(cl_char) * num_elements * buffer_size_multiplier
+            * max_pattern_length;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernelArgs() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
+                                sizeof(cl_char) * (max_pattern_length + 1),
+                                nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, data_size(),
+                                 nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        cl_int offset[] = { 0, max_pattern_length };
+        off_mem =
+            clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                           sizeof(offset), offset, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel, 1, sizeof(out_mem), &out_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel, 2, sizeof(off_mem), &off_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUp(int elements) override
+    {
+        auto pcFname = get_temp_filename();
+        temp_filename = pcFname;
+
+        if (pcFname != nullptr) free(pcFname);
+
+        if (temp_filename.empty())
+        {
+            log_error("get_temp_filename failed\n");
+            return -1;
+        }
+
+        return BasicCommandBufferTest::SetUp(elements);
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        // record command buffer with primary queue
+        error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        if (simultaneous_use_support)
+        {
+            // enqueue simultaneous command-buffers with printf calls
+            error = RunSimultaneous();
+            test_error(error, "RunSimultaneous failed");
+        }
+        else
+        {
+            // enqueue single command-buffer with printf calls
+            error = RunSingle();
+            test_error(error, "RunSingle failed");
+        }
+
+        std::remove(temp_filename.c_str());
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordCommandBuffer()
+    {
+        cl_int error = CL_SUCCESS;
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+#define test_error_release_stdout(errCode, msg)                                \
+    {                                                                          \
+        auto errCodeResult = errCode;                                          \
+        if (errCodeResult != CL_SUCCESS)                                       \
+        {                                                                      \
+            ReleaseOutputStream(file_descriptor);                              \
+            print_error(errCodeResult, msg);                                   \
+            return errCode;                                                    \
+        }                                                                      \
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int EnqueueSinglePass(const std::vector<cl_char>& pattern,
+                             std::vector<cl_char>& output_data)
+    {
+        cl_int error = CL_SUCCESS;
+        auto in_mem_size = sizeof(cl_char) * pattern.size();
+        error = clEnqueueWriteBuffer(queue, in_mem, CL_TRUE, 0, in_mem_size,
+                                     &pattern[0], 0, nullptr, nullptr);
+        test_error(error, "clEnqueueWriteBuffer failed");
+
+        cl_int offset[] = { 0, pattern.size() - 1 };
+        error = clEnqueueWriteBuffer(queue, off_mem, CL_TRUE, 0, sizeof(offset),
+                                     offset, 0, nullptr, nullptr);
+        test_error(error, "clEnqueueWriteBuffer failed");
+
+        // redirect output stream to temporary file
+        file_descriptor = AcquireOutputStream(&error);
+        if (error != 0)
+        {
+            log_error("Error while redirection stdout to file");
+            return TEST_FAIL;
+        }
+
+        // enqueue command buffer with kernel containing printf command
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, &wait_event);
+        test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
+
+        fflush(stdout);
+
+        // Wait until kernel finishes its execution and (thus) the output
+        // printed from the kernel is immediately printed
+        error = clWaitForEvents(1, &wait_event);
+        test_error(error, "clWaitForEvents failed");
+
+        // output buffer contains pattern to be compared with printout
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error_release_stdout(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error_release_stdout(error, "clFinish failed");
+
+        ReleaseOutputStream(file_descriptor);
+
+        // copy content of temporary file into string stream
+        std::stringstream sstr;
+        GetAnalysisBuffer(sstr);
+        if (sstr.str().size() != num_elements * offset[1])
+        {
+            log_error("GetAnalysisBuffer failed\n");
+            return TEST_FAIL;
+        }
+
+        // verify the result - compare printout and output buffer
+        for (size_t i = 0; i < num_elements * offset[1]; i++)
+        {
+            CHECK_VERIFICATION_ERROR(sstr.str().at(i), output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSingle()
+    {
+        cl_int error = CL_SUCCESS;
+        std::vector<cl_char> output_data(num_elements * max_pattern_length);
+
+        for (unsigned i = 0; i < num_test_iters; i++)
+        {
+            unsigned pattern_length =
+                std::max(min_pattern_length, rand() % max_pattern_length);
+            char pattern_character = 'a' + rand() % 26;
+            std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
+            pattern[pattern_length] = '\0';
+            error = EnqueueSinglePass(pattern, output_data);
+            test_error(error, "EnqueueSinglePass failed");
+
+            output_data.assign(output_data.size(), 0);
+            PurgeTempFile();
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    struct SimulPassData
+    {
+        // null terminated character buffer
+        std::vector<cl_char> pattern;
+        // 0-command buffer offset, 1-pattern offset
+        cl_int offset[2];
+        std::vector<cl_char> output_buffer;
+    };
+
+    //--------------------------------------------------------------------------
+    cl_int EnqueueSimultaneousPass(SimulPassData& pd)
+    {
+        // write current pattern to device memory
+        auto in_mem_size = sizeof(cl_char) * pd.pattern.size();
+        cl_int error =
+            clEnqueueWriteBuffer(queue, in_mem, CL_FALSE, 0, in_mem_size,
+                                 &pd.pattern[0], 0, nullptr, nullptr);
+        test_error_release_stdout(error, "clEnqueueWriteBuffer failed");
+
+        // refresh offsets for current enqueuing
+        error =
+            clEnqueueWriteBuffer(queue, off_mem, CL_FALSE, 0, sizeof(pd.offset),
+                                 pd.offset, 0, nullptr, nullptr);
+        test_error_release_stdout(error, "clEnqueueWriteBuffer failed");
+
+        // create user event to block simultaneous command buffers
+        if (!trigger_event)
+        {
+            trigger_event = clCreateUserEvent(context, &error);
+            test_error_release_stdout(error, "clCreateUserEvent failed");
+        }
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+                                          &trigger_event, nullptr);
+        test_error_release_stdout(error, "clEnqueueCommandBufferKHR failed");
+
+        // output buffer contains pattern to be compared with printout
+        error = clEnqueueReadBuffer(
+            queue, out_mem, CL_FALSE, pd.offset[0] * sizeof(cl_char),
+            pd.output_buffer.size() * sizeof(cl_char), pd.output_buffer.data(),
+            0, nullptr, nullptr);
+        test_error_release_stdout(error, "clEnqueueReadBuffer failed");
+
+        return CL_SUCCESS;
+    }
+
+
+    //--------------------------------------------------------------------------
+    cl_int RunSimultaneous()
+    {
+        cl_int error = CL_SUCCESS;
+        cl_int offset = static_cast<cl_int>(num_elements * max_pattern_length);
+
+        std::vector<SimulPassData> simul_passes(num_test_iters);
+
+        const int pattern_chars_range = 26;
+        std::list<cl_char> pattern_chars;
+        for (size_t i = 0; i < pattern_chars_range; i++)
+            pattern_chars.push_back(cl_char('a' + i));
+
+        test_assert_error(pattern_chars.size() >= num_test_iters,
+                          "Number of simultaneous launches must be lower than "
+                          "size of characters container");
+
+        cl_int total_pattern_coverage = 0;
+        for (unsigned i = 0; i < num_test_iters; i++)
+        {
+            // random character pattern unique for each iteration
+            auto it = pattern_chars.begin();
+            std::advance(it, rand() % pattern_chars.size());
+            char pattern_character = *it;
+            unsigned pattern_length =
+                std::max(min_pattern_length, rand() % max_pattern_length);
+
+            std::vector<cl_char> pattern(pattern_length + 1, pattern_character);
+            pattern[pattern_length] = '\0';
+            simul_passes[i] = { pattern,
+                                { cl_int(i * offset), cl_int(pattern_length) },
+                                std::vector<cl_char>(num_elements
+                                                     * pattern_length) };
+            total_pattern_coverage += simul_passes[i].output_buffer.size();
+            pattern_chars.erase(it);
+        };
+
+        // takeover stdout stream
+        file_descriptor = AcquireOutputStream(&error);
+        if (error != 0)
+        {
+            log_error("Error while redirection stdout to file");
+            return TEST_FAIL;
+        }
+
+        // enqueue read/write and command buffer operations
+        for (auto&& pass : simul_passes)
+        {
+            error = EnqueueSimultaneousPass(pass);
+            test_error_release_stdout(error, "EnqueueSimultaneousPass failed");
+        }
+
+        // execute command buffers
+        error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
+        test_error_release_stdout(error, "clSetUserEventStatus failed");
+
+        // flush streams
+        fflush(stdout);
+
+        // finish command queue
+        error = clFinish(queue);
+        test_error_release_stdout(error, "clFinish failed\n");
+
+        ReleaseOutputStream(file_descriptor);
+
+        std::stringstream sstr;
+        GetAnalysisBuffer(sstr);
+        if (sstr.str().size() != total_pattern_coverage)
+        {
+            log_error("GetAnalysisBuffer failed\n");
+            return TEST_FAIL;
+        }
+
+        // verify the result - compare printout and output buffer
+        std::map<cl_char, size_t> counters_map;
+        for (int i = 0; i < total_pattern_coverage; i++)
+            counters_map[sstr.str().at(i)]++;
+
+        if (counters_map.size() != simul_passes.size())
+        {
+            log_error("printout inconsistent with input data\n");
+            return TEST_FAIL;
+        }
+
+        for (auto&& pass : simul_passes)
+        {
+            auto& res_data = pass.output_buffer;
+
+            if (res_data.empty()
+                || res_data.size() != counters_map[res_data.front()])
+            {
+                log_error("output buffer inconsistent with printout\n");
+                return TEST_FAIL;
+            }
+
+            // verify consistency of output buffer
+            for (size_t i = 0; i < res_data.size(); i++)
+            {
+                CHECK_VERIFICATION_ERROR(res_data.front(), res_data[i], i);
+            }
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    clEventWrapper trigger_event = nullptr;
+    clEventWrapper wait_event = nullptr;
+
+    std::string temp_filename;
+    int file_descriptor;
+
+    bool printf_use_support;
+
+    // specifies max test length for printf pattern
+    const unsigned max_pattern_length = 6;
+    // specifies min test length for printf pattern
+    const unsigned min_pattern_length = 1;
+    // specifies number of command-buffer enqueue iterations
+    const unsigned num_test_iters = 3;
+};
+
+} // anonymous namespace
+
+int test_basic_printf(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferPrintfTest<false>>(device, context,
+                                                          queue, num_elements);
+}
+
+int test_simultaneous_printf(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferPrintfTest<true>>(device, context, queue,
+                                                         num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
new file mode 100644
index 00000000..28d80450
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
@@ -0,0 +1,303 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#include <vector>
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Command-buffer profiling test cases:
+// -all commands are recorded to a single command-queue
+// -profiling a command-buffer with simultaneous use
+
+template <bool simultaneous_request>
+struct CommandBufferProfiling : public BasicCommandBufferTest
+{
+    CommandBufferProfiling(cl_device_id device, cl_context context,
+                           cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue), wait_event(nullptr)
+    {
+        simultaneous_use_requested = simultaneous_request;
+        if (simultaneous_request) buffer_size_multiplier = 2;
+    }
+
+    //--------------------------------------------------------------------------
+    bool Skip() override
+    {
+        if (BasicCommandBufferTest::Skip()) return true;
+
+        Version version = get_device_cl_version(device);
+        const cl_device_info host_queue_query = version >= Version(2, 0)
+            ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES
+            : CL_DEVICE_QUEUE_PROPERTIES;
+
+        cl_command_queue_properties host_queue_props = 0;
+        int error =
+            clGetDeviceInfo(device, host_queue_query, sizeof(host_queue_props),
+                            &host_queue_props, NULL);
+        if (error != CL_SUCCESS)
+        {
+            print_error(
+                error, "clGetDeviceInfo for CL_DEVICE_QUEUE_PROPERTIES failed");
+            return true;
+        }
+
+        if ((host_queue_props & CL_QUEUE_PROFILING_ENABLE) == 0)
+        {
+            log_info(
+                "Queue property CL_QUEUE_PROFILING_ENABLE not supported \n");
+            return true;
+        }
+        return (simultaneous_use_requested && !simultaneous_use_support);
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = CL_SUCCESS;
+        queue = clCreateCommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE,
+                                     &error);
+        test_error(error, "clCreateCommandQueue failed");
+
+        return BasicCommandBufferTest::SetUp(elements);
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        // record command buffer
+        error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        if (simultaneous_use_requested)
+        {
+            // enqueue simultaneous command-buffers with profiling command queue
+            error = RunSimultaneous();
+            test_error(error, "RunSimultaneous failed");
+        }
+        else
+        {
+            // enqueue single command-buffer with profiling command queue
+            error = RunSingle();
+            test_error(error, "RunSingle failed");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordCommandBuffer()
+    {
+        cl_int error = CL_SUCCESS;
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+#define ADD_PROF_PARAM(prop)                                                   \
+    {                                                                          \
+        prop, #prop, 0                                                         \
+    }
+    struct ProfilingParam
+    {
+        cl_profiling_info param;
+        std::string name;
+        cl_ulong value;
+    };
+
+    //--------------------------------------------------------------------------
+    cl_int VerifyResult(const clEventWrapper& event)
+    {
+        cl_int error = CL_SUCCESS;
+        cl_int status;
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status), &status, NULL);
+        test_error(error, "clGetEventInfo() failed");
+
+        if (status != CL_SUCCESS)
+            test_fail("Kernel execution status %d! (%s:%d)\n", status, __FILE__,
+                      __LINE__);
+
+        std::vector<ProfilingParam> prof_params = {
+            ADD_PROF_PARAM(CL_PROFILING_COMMAND_QUEUED),
+            ADD_PROF_PARAM(CL_PROFILING_COMMAND_SUBMIT),
+            ADD_PROF_PARAM(CL_PROFILING_COMMAND_START),
+            ADD_PROF_PARAM(CL_PROFILING_COMMAND_END),
+        };
+
+        // gather profiling timestamps
+        for (auto&& p : prof_params)
+        {
+            error = clGetEventProfilingInfo(event, p.param, sizeof(p.value),
+                                            &p.value, NULL);
+            test_error(error, "clGetEventProfilingInfo() failed");
+        }
+
+        // verify the results by comparing timestamps
+        bool all_vals_0 = prof_params.front().value != 0;
+        for (int i = 1; i < prof_params.size(); i++)
+        {
+            all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
+            if (prof_params[i - 1].value > prof_params[i].value)
+            {
+                log_error("Profiling %s=0x%x should be smaller than or equal "
+                          "to %s=0x%x for "
+                          "kernels that use the on-device queue",
+                          prof_params[i - 1].name.c_str(),
+                          prof_params[i - 1].param, prof_params[i].name.c_str(),
+                          prof_params[i].param);
+                return TEST_FAIL;
+            }
+        }
+
+        if (all_vals_0)
+        {
+            log_error("All values are 0. This is exceedingly unlikely.\n");
+            return TEST_FAIL;
+        }
+
+        log_info("Profiling info for command-buffer kernel succeeded.\n");
+        return TEST_PASS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSingle()
+    {
+        cl_int error = CL_SUCCESS;
+        std::vector<cl_int> output_data(num_elements);
+
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        clEventWrapper query_event;
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, &query_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = VerifyResult(query_event);
+        test_error(error, "VerifyResult failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    struct SimulPassData
+    {
+        cl_int offset;
+        std::vector<cl_int> output_buffer;
+        clEventWrapper query_event;
+    };
+
+    //--------------------------------------------------------------------------
+    cl_int EnqueueSimultaneousPass(SimulPassData& pd)
+    {
+        cl_int error = clEnqueueFillBuffer(
+            queue, out_mem, &pattern, sizeof(cl_int),
+            pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueFillBuffer(queue, off_mem, &pd.offset, sizeof(cl_int),
+                                    0, sizeof(cl_int), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        if (!wait_event)
+        {
+            wait_event = clCreateUserEvent(context, &error);
+            test_error(error, "clCreateUserEvent failed");
+        }
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+                                          &wait_event, &pd.query_event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(
+            queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
+            pd.output_buffer.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSimultaneous()
+    {
+        cl_int error = CL_SUCCESS;
+        cl_int offset = static_cast<cl_int>(num_elements);
+
+        std::vector<SimulPassData> simul_passes = {
+            { 0, std::vector<cl_int>(num_elements) },
+            { offset, std::vector<cl_int>(num_elements) }
+        };
+
+        for (auto&& pass : simul_passes)
+        {
+            error = EnqueueSimultaneousPass(pass);
+            test_error(error, "EnqueueSimultaneousPass failed");
+        }
+
+        error = clSetUserEventStatus(wait_event, CL_COMPLETE);
+        test_error(error, "clSetUserEventStatus failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        for (auto&& pass : simul_passes)
+        {
+            error = VerifyResult(pass.query_event);
+            test_error(error, "VerifyResult failed");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    clEventWrapper wait_event;
+
+    const cl_int pattern = 0xA;
+};
+
+} // anonymous namespace
+
+int test_basic_profiling(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferProfiling<false>>(device, context, queue,
+                                                         num_elements);
+}
+
+int test_simultaneous_profiling(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferProfiling<true>>(device, context, queue,
+                                                        num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
new file mode 100644
index 00000000..7aa262aa
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_queue_substitution.cpp
@@ -0,0 +1,278 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#include <vector>
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Command-queue substitution tests which handles below cases:
+// -substitution on queue without properties
+// -substitution on queue with properties
+// -simultaneous use queue substitution
+
+template <bool prop_use, bool simul_use>
+struct SubstituteQueueTest : public BasicCommandBufferTest
+{
+    SubstituteQueueTest(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue),
+          properties_use_requested(prop_use), user_event(nullptr)
+    {
+        simultaneous_use_requested = simul_use;
+        if (simul_use) buffer_size_multiplier = 2;
+    }
+
+    //--------------------------------------------------------------------------
+    bool Skip() override
+    {
+        if (properties_use_requested)
+        {
+            Version version = get_device_cl_version(device);
+            const cl_device_info host_queue_query = version >= Version(2, 0)
+                ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES
+                : CL_DEVICE_QUEUE_PROPERTIES;
+
+            cl_queue_properties host_queue_props = 0;
+            int error = clGetDeviceInfo(device, host_queue_query,
+                                        sizeof(host_queue_props),
+                                        &host_queue_props, NULL);
+            test_error(error, "clGetDeviceInfo failed");
+
+            if ((host_queue_props & CL_QUEUE_PROFILING_ENABLE) == 0)
+                return true;
+        }
+
+        return BasicCommandBufferTest::Skip()
+            || (simultaneous_use_requested && !simultaneous_use_support);
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUp(int elements) override
+    {
+        // By default command queue is created without properties,
+        // if test requires queue with properties default queue must be
+        // replaced.
+        if (properties_use_requested)
+        {
+            // due to the skip condition
+            cl_int error = CL_SUCCESS;
+            queue = clCreateCommandQueue(context, device,
+                                         CL_QUEUE_PROFILING_ENABLE, &error);
+            test_error(
+                error,
+                "clCreateCommandQueue with CL_QUEUE_PROFILING_ENABLE failed");
+        }
+
+        return BasicCommandBufferTest::SetUp(elements);
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int Run() override
+    {
+        // record command buffer with primary queue
+        cl_int error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        // create substitute queue
+        clCommandQueueWrapper new_queue;
+        if (properties_use_requested)
+        {
+            new_queue = clCreateCommandQueue(context, device,
+                                             CL_QUEUE_PROFILING_ENABLE, &error);
+            test_error(
+                error,
+                "clCreateCommandQueue with CL_QUEUE_PROFILING_ENABLE failed");
+        }
+        else
+        {
+            const cl_command_queue_properties queue_properties = 0;
+            new_queue =
+                clCreateCommandQueue(context, device, queue_properties, &error);
+            test_error(error, "clCreateCommandQueue failed");
+        }
+
+        if (simultaneous_use_support)
+        {
+            // enque simultaneous command-buffers with substitute queue
+            error = RunSimultaneous(new_queue);
+            test_error(error, "RunSimultaneous failed");
+        }
+        else
+        {
+            // enque single command-buffer with substitute queue
+            error = RunSingle(new_queue);
+            test_error(error, "RunSingle failed");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordCommandBuffer()
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSingle(const cl_command_queue& q)
+    {
+        cl_int error = CL_SUCCESS;
+        std::vector<cl_int> output_data(num_elements);
+
+        error = clEnqueueFillBuffer(q, in_mem, &pattern_pri, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        cl_command_queue queues[] = { q };
+        error = clEnqueueCommandBufferKHR(1, queues, command_buffer, 0, nullptr,
+                                          nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(q, out_mem, CL_TRUE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(q);
+        test_error(error, "clFinish failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    struct SimulPassData
+    {
+        cl_int pattern;
+        cl_int offset;
+        cl_command_queue queue;
+        std::vector<cl_int> output_buffer;
+    };
+
+    //--------------------------------------------------------------------------
+    cl_int EnqueueSimultaneousPass(SimulPassData& pd)
+    {
+        cl_int error = clEnqueueFillBuffer(
+            pd.queue, in_mem, &pd.pattern, sizeof(cl_int),
+            pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error =
+            clEnqueueFillBuffer(pd.queue, off_mem, &pd.offset, sizeof(cl_int),
+                                0, sizeof(cl_int), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        if (!user_event)
+        {
+            user_event = clCreateUserEvent(context, &error);
+            test_error(error, "clCreateUserEvent failed");
+        }
+
+        cl_command_queue queues[] = { pd.queue };
+        error = clEnqueueCommandBufferKHR(1, queues, command_buffer, 1,
+                                          &user_event, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(
+            pd.queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int),
+            data_size(), pd.output_buffer.data(), 0, nullptr, nullptr);
+
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSimultaneous(const cl_command_queue& q)
+    {
+        cl_int error = CL_SUCCESS;
+        cl_int offset = static_cast<cl_int>(num_elements);
+
+        std::vector<SimulPassData> simul_passes = {
+            { pattern_pri, 0, q, std::vector<cl_int>(num_elements) },
+            { pattern_sec, offset, q, std::vector<cl_int>(num_elements) }
+        };
+
+        for (auto&& pass : simul_passes)
+        {
+            error = EnqueueSimultaneousPass(pass);
+            test_error(error, "EnqueuePass failed");
+        }
+
+        error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(error, "clSetUserEventStatus failed");
+
+        for (auto&& pass : simul_passes)
+        {
+            error = clFinish(pass.queue);
+            test_error(error, "clFinish failed");
+
+            auto& res_data = pass.output_buffer;
+
+            for (size_t i = 0; i < num_elements; i++)
+            {
+                CHECK_VERIFICATION_ERROR(pass.pattern, res_data[i], i);
+            }
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    const cl_int pattern_pri = 0xB;
+    const cl_int pattern_sec = 0xC;
+
+    bool properties_use_requested;
+    clEventWrapper user_event;
+};
+
+} // anonymous namespace
+
+int test_queue_substitution(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<SubstituteQueueTest<false, false>>(
+        device, context, queue, num_elements);
+}
+
+int test_properties_queue_substitution(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<SubstituteQueueTest<true, false>>(
+        device, context, queue, num_elements);
+}
+
+int test_simultaneous_queue_substitution(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements)
+{
+    return MakeAndRunTest<SubstituteQueueTest<false, true>>(
+        device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_set_kernel_arg.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_set_kernel_arg.cpp
new file mode 100644
index 00000000..ef938421
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_set_kernel_arg.cpp
@@ -0,0 +1,321 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#include <vector>
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// clSetKernelArg tests for cl_khr_command_buffer which handles below cases:
+// -test interactions of clSetKernelArg with command-buffers
+// -test interactions of clSetKernelArg on a command-buffer pending execution
+
+template <bool simul_use>
+struct CommandBufferSetKernelArg : public BasicCommandBufferTest
+{
+    CommandBufferSetKernelArg(cl_device_id device, cl_context context,
+                              cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue), trigger_event(nullptr)
+    {
+        simultaneous_use_requested = simul_use;
+        if (simul_use) buffer_size_multiplier = 2;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernel() override
+    {
+        cl_int error = CL_SUCCESS;
+
+        const char* kernel_str =
+            R"(
+            __kernel void copy(int in, __global int* out, __global int* offset)
+            {
+                size_t id = get_global_id(0);
+                size_t ind = offset[0] + id;
+                out[ind] = in;
+            })";
+
+        error = create_single_kernel_helper_create_program(context, &program, 1,
+                                                           &kernel_str);
+        test_error(error, "Failed to create program with source");
+
+        error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+        test_error(error, "Failed to build program");
+
+        kernel = clCreateKernel(program, "copy", &error);
+        test_error(error, "Failed to create copy kernel");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int SetUpKernelArgs() override
+    {
+        cl_int error = CL_SUCCESS;
+        out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                 num_elements * buffer_size_multiplier
+                                     * sizeof(cl_int),
+                                 nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        // create secondary output buffer to test kernel args substitution
+        out_mem_k2 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                    num_elements * buffer_size_multiplier
+                                        * sizeof(cl_int),
+                                    nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        cl_int offset = 0;
+        off_mem =
+            clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+                           sizeof(cl_int), &offset, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        cl_int in_arg = pattern_pri;
+        error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel, 1, sizeof(out_mem), &out_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel, 2, sizeof(off_mem), &off_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int Run() override
+    {
+        cl_int error = CL_SUCCESS;
+        if (simultaneous_use_requested)
+        {
+            // enqueue simultaneous command-buffers with clSetKernelArg calls
+            error = RunSimultaneous();
+            test_error(error, "RunSimultaneous failed");
+        }
+        else
+        {
+            // enqueue single command-buffer with  clSetKernelArg calls
+            error = RunSingle();
+            test_error(error, "RunSingle failed");
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RecordCommandBuffer()
+    {
+        cl_int error = CL_SUCCESS;
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        // changing kernel args at this point should have no effect,
+        // test will verify if clSetKernelArg didn't affect the first command
+        cl_int in_arg = pattern_sec;
+        error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSingle()
+    {
+        cl_int error = CL_SUCCESS;
+        std::vector<cl_int> output_data(num_elements);
+
+        // record command buffer
+        error = RecordCommandBuffer();
+        test_error(error, "RecordCommandBuffer failed");
+
+        const cl_int pattern_base = 0;
+        error =
+            clEnqueueFillBuffer(queue, out_mem, &pattern_base, sizeof(cl_int),
+                                0, data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        const cl_int pattern_base_k2 = 1;
+        error = clEnqueueFillBuffer(queue, out_mem_k2, &pattern_base_k2,
+                                    sizeof(cl_int), 0, data_size(), 0, nullptr,
+                                    nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        // verify the result - result buffer must contain initial pattern
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    struct SimulPassData
+    {
+        cl_int pattern;
+        cl_int offset;
+        std::vector<cl_int> output_buffer;
+    };
+
+    //--------------------------------------------------------------------------
+    cl_int RecordSimultaneousCommandBuffer() const
+    {
+        cl_int error = CL_SUCCESS;
+
+        error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int EnqueueSimultaneousPass(SimulPassData& pd)
+    {
+        cl_int error = clEnqueueFillBuffer(
+            queue, out_mem, &pd.pattern, sizeof(cl_int),
+            pd.offset * sizeof(cl_int), data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueFillBuffer(queue, off_mem, &pd.offset, sizeof(cl_int),
+                                    0, sizeof(cl_int), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        if (!trigger_event)
+        {
+            trigger_event = clCreateUserEvent(context, &error);
+            test_error(error, "clCreateUserEvent failed");
+        }
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+                                          &trigger_event, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueReadBuffer(
+            queue, out_mem, CL_FALSE, pd.offset * sizeof(cl_int), data_size(),
+            pd.output_buffer.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    cl_int RunSimultaneous()
+    {
+        cl_int error = CL_SUCCESS;
+
+        // record command buffer with primary queue
+        error = RecordSimultaneousCommandBuffer();
+        test_error(error, "RecordSimultaneousCommandBuffer failed");
+
+        std::vector<SimulPassData> simul_passes = {
+            { 0, 0, std::vector<cl_int>(num_elements) }
+        };
+
+        error = EnqueueSimultaneousPass(simul_passes.front());
+        test_error(error, "EnqueueSimultaneousPass 1 failed");
+
+        // changing kernel args at this point should have no effect,
+        // test will verify if clSetKernelArg didn't affect command-buffer
+        cl_int in_arg = pattern_sec;
+        error = clSetKernelArg(kernel, 0, sizeof(cl_int), &in_arg);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel, 1, sizeof(out_mem_k2), &out_mem_k2);
+        test_error(error, "clSetKernelArg failed");
+
+        if (simultaneous_use_support)
+        {
+            cl_int offset = static_cast<cl_int>(num_elements);
+            simul_passes.push_back(
+                { 1, offset, std::vector<cl_int>(num_elements) });
+
+            error = EnqueueSimultaneousPass(simul_passes.back());
+            test_error(error, "EnqueueSimultaneousPass 2 failed");
+        }
+
+        error = clSetUserEventStatus(trigger_event, CL_COMPLETE);
+        test_error(error, "clSetUserEventStatus failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        // verify the result buffer
+        for (auto&& pass : simul_passes)
+        {
+            auto& res_data = pass.output_buffer;
+            for (size_t i = 0; i < num_elements; i++)
+            {
+                CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
+            }
+        }
+
+        return CL_SUCCESS;
+    }
+
+    //--------------------------------------------------------------------------
+    clEventWrapper trigger_event = nullptr;
+
+    const cl_int pattern_pri = 2;
+    const cl_int pattern_sec = 3;
+
+    clMemWrapper out_mem_k2 = nullptr;
+};
+
+} // anonymous namespace
+
+int test_basic_set_kernel_arg(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferSetKernelArg<false>>(
+        device, context, queue, num_elements);
+}
+
+int test_pending_set_kernel_arg(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandBufferSetKernelArg<true>>(device, context,
+                                                           queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp
new file mode 100644
index 00000000..d73fc9ce
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp
@@ -0,0 +1,123 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+#include <vector>
+
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Command-bufer barrier tests which handles below cases:
+//
+// - barrier wait list
+
+struct BarrierWithWaitListKHR : public BasicCommandBufferTest
+{
+
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    BarrierWithWaitListKHR(cl_device_id device, cl_context context,
+                           cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue),
+          out_of_order_queue(nullptr), out_of_order_command_buffer(this),
+          event(nullptr)
+    {}
+
+    cl_int Run() override
+    {
+        cl_int error =
+            clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
+                                   &pattern, sizeof(cl_int), 0, data_size(), 0,
+                                   nullptr, &sync_points[0], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        const cl_int overwritten_pattern = 0xACDC;
+        error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
+                                       out_mem, &overwritten_pattern,
+                                       sizeof(cl_int), 0, data_size(), 0,
+                                       nullptr, &sync_points[1], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandBarrierWithWaitListKHR(out_of_order_command_buffer,
+                                                nullptr, 2, sync_points,
+                                                nullptr, nullptr);
+        test_error(error, "clCommandBarrierWithWaitListKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &num_elements, nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, out_of_order_command_buffer, 0, nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
+                                    data_size(), output_data.data(), 1, &event,
+                                    nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        out_of_order_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+        test_error(error, "Unable to create command queue to test with");
+
+        out_of_order_command_buffer =
+            clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        return BasicCommandBufferTest::Skip() || !out_of_order_support;
+    }
+
+    const cl_int pattern = 0x16;
+    clCommandQueueWrapper out_of_order_queue;
+    clCommandBufferWrapper out_of_order_command_buffer;
+    clEventWrapper event;
+    cl_sync_point_khr sync_points[2];
+    clEventWrapper user_event;
+};
+};
+
+
+int test_barrier_wait_list(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<BarrierWithWaitListKHR>(device, context, queue,
+                                                  num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp
new file mode 100644
index 00000000..102ae761
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp
@@ -0,0 +1,394 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "harness/typeWrappers.h"
+#include "procs.h"
+
+#include <vector>
+
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Command-buffer copy tests which handles below cases:
+//
+// -copy image
+// -copy buffer
+// -copy buffer to image
+// -copy image to buffer
+// -copy buffer rect
+
+struct CopyImageKHR : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillImageKHR(command_buffer, nullptr, src_image,
+                                             fill_color, origin, region, 0,
+                                             nullptr, nullptr, nullptr);
+
+        test_error(error, "clCommandFillImageKHR failed");
+
+        error = clCommandCopyImageKHR(command_buffer, nullptr, src_image,
+                                      dst_image, origin, origin, region, 0, 0,
+                                      nullptr, nullptr);
+
+        test_error(error, "clCommandCopyImageKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_char> output_data(data_size);
+        error = clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0,
+                                   0, output_data.data(), 0, nullptr, nullptr);
+
+        for (size_t i = 0; i < data_size; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        src_image = create_image_2d(context, CL_MEM_READ_ONLY, &formats,
+                                    img_width, img_height, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        dst_image = create_image_2d(context, CL_MEM_WRITE_ONLY, &formats,
+                                    img_width, img_height, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        bool imageSupport =
+            checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED;
+
+        return imageSupport || BasicCommandBufferTest::Skip();
+    }
+
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, 1 };
+    const cl_uint pattern = 0x05;
+    const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+    const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+    clMemWrapper src_image;
+    clMemWrapper dst_image;
+};
+
+struct CopyBufferKHR : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0,
+            data_size(), 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandCopyBufferKHR(command_buffer, nullptr, in_mem, out_mem,
+                                       0, 0, data_size(), 0, nullptr, nullptr,
+                                       nullptr);
+        test_error(error, "clCommandCopyBufferKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_char> output_data(data_size());
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < data_size(); i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    const cl_char pattern = 0x14;
+};
+
+struct CopyBufferToImageKHR : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, buffer, &pattern, sizeof(cl_char), 0,
+            data_size, 0, nullptr, nullptr, nullptr);
+
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandCopyBufferToImageKHR(command_buffer, nullptr, buffer,
+                                              image, 0, origin, region, 0, 0,
+                                              nullptr, nullptr);
+
+        test_error(error, "clCommandCopyBufferToImageKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_char> output_data(data_size);
+
+        error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0,
+                                   output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadImage failed");
+
+        for (size_t i = 0; i < data_size; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        image = create_image_2d(context, CL_MEM_READ_WRITE, &formats, img_width,
+                                img_height, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size, nullptr,
+                                &error);
+        test_error(error, "Unable to create buffer");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        bool imageSupport =
+            checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED;
+
+        return imageSupport || BasicCommandBufferTest::Skip();
+    }
+
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, 1 };
+    const cl_char pattern = 0x11;
+    const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+
+    clMemWrapper buffer;
+    clMemWrapper image;
+};
+
+struct CopyImageToBufferKHR : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error =
+            clCommandFillImageKHR(command_buffer, nullptr, image, fill_color,
+                                  origin, region, 0, nullptr, nullptr, nullptr);
+
+        test_error(error, "clCommandFillImageKHR failed");
+
+        error = clCommandCopyImageToBufferKHR(command_buffer, nullptr, image,
+                                              buffer, origin, region, 0, 0,
+                                              nullptr, nullptr, nullptr);
+
+        test_error(error, "clCommandCopyImageToBufferKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_char> output_data(data_size);
+
+        error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size,
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < data_size; i++)
+        {
+            CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern),
+                                     output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        image = create_image_2d(context, CL_MEM_READ_WRITE, &formats, img_width,
+                                img_height, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size, nullptr,
+                                &error);
+        test_error(error, "Unable to create buffer");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        bool imageSupport =
+            checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED;
+
+        return imageSupport || BasicCommandBufferTest::Skip();
+    }
+
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, 1 };
+    const cl_uint pattern = 0x12;
+    const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+    const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+
+    clMemWrapper image;
+    clMemWrapper buffer;
+};
+
+struct CopyBufferRectKHR : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0,
+            data_size, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandCopyBufferRectKHR(
+            command_buffer, nullptr, in_mem, out_mem, origin, origin, region, 0,
+            0, 0, 0, 0, nullptr, nullptr, nullptr);
+
+        test_error(error, "clCommandCopyBufferRectKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_char> output_data(data_size);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size,
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < data_size; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        in_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size, nullptr,
+                                &error);
+        test_error(error, "clCreateBuffer failed");
+
+        out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, data_size, nullptr,
+                                 &error);
+        test_error(error, "Unable to create buffer");
+
+        return CL_SUCCESS;
+    }
+
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t data_size = img_width * img_height * sizeof(cl_char);
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, 1 };
+    const cl_char pattern = 0x13;
+
+    clMemWrapper in_mem;
+    clMemWrapper out_mem;
+};
+};
+
+int test_copy_image(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CopyImageKHR>(device, context, queue, num_elements);
+}
+
+int test_copy_buffer(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CopyBufferKHR>(device, context, queue, num_elements);
+}
+
+int test_copy_buffer_to_image(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CopyBufferToImageKHR>(device, context, queue,
+                                                num_elements);
+}
+
+int test_copy_image_to_buffer(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CopyImageToBufferKHR>(device, context, queue,
+                                                num_elements);
+}
+
+int test_copy_buffer_rect(cl_device_id device, cl_context context,
+                          cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CopyBufferRectKHR>(device, context, queue,
+                                             num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_event_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_event_info.cpp
new file mode 100644
index 00000000..19026ffe
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_event_info.cpp
@@ -0,0 +1,254 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "procs.h"
+#include <vector>
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// get event info tests which handles below cases:
+//
+// -command type
+// -command queue
+// -context
+// -execution status
+// -reference count
+
+struct CommandType : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        clEventWrapper event;
+        cl_int status;
+
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clWaitForEvents(1, &event);
+        test_error(error, "Unable to wait for event");
+
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(status),
+                               &status, NULL);
+        test_error(error, "clGetEventInfo failed");
+
+        if (status != CL_COMMAND_COMMAND_BUFFER_KHR)
+        {
+            log_error(
+                "ERROR: Incorrect status returned from clGetEventInfo (%d)\n",
+                status);
+
+            return TEST_FAIL;
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+struct CommandQueue : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        clEventWrapper event;
+        size_t size;
+
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        cl_command_queue otherQueue;
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE,
+                               sizeof(otherQueue), &otherQueue, &size);
+        test_error(error, "Unable to get event info!");
+
+        // We can not check if this is the right queue because this is an opaque
+        // object.
+        if (size != sizeof(queue) || otherQueue == NULL)
+        {
+            log_error("ERROR: Returned command queue size does not validate "
+                      "(expected %zu, got %zu)\n",
+                      sizeof(queue), size);
+            return TEST_FAIL;
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+struct Context : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        clEventWrapper event;
+        size_t size;
+
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        cl_context testCtx;
+        error = clGetEventInfo(event, CL_EVENT_CONTEXT, sizeof(testCtx),
+                               &testCtx, &size);
+        test_error(error, "Unable to get event context info!");
+        if (size != sizeof(context))
+        {
+            log_error(
+                "ERROR: Returned context size does not validate (expected "
+                "%zu, got %zu)\n",
+                sizeof(context), size);
+            return TEST_FAIL;
+        }
+        if (testCtx != context)
+        {
+            log_error("ERROR: Returned context does not match (expected %p, "
+                      "got %p)\n",
+                      (void *)context, (void *)testCtx);
+            return TEST_FAIL;
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+struct ExecutionStatus : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        clEventWrapper event;
+        cl_int status;
+
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status), &status, NULL);
+        test_error(error, "clGetEventInfo failed");
+
+        if (!(status == CL_QUEUED || status == CL_SUBMITTED
+              || status == CL_RUNNING || status == CL_COMPLETE))
+        {
+            log_error(
+                "ERROR: Incorrect status returned from clGetEventInfo (%d)\n",
+                status);
+            return TEST_FAIL;
+        }
+
+        error = clWaitForEvents(1, &event);
+        test_error(error, "clWaitForEvents failed");
+
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status), &status, NULL);
+        test_error(error, "clGetEventInfo failed");
+
+        if (status != CL_COMPLETE)
+        {
+            log_error(
+                "ERROR: Incorrect status returned from clGetEventInfo (%d)\n",
+                status);
+            return TEST_FAIL;
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+struct ReferenceCount : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        clEventWrapper event;
+        size_t size;
+        cl_uint count;
+
+        cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clGetEventInfo(event, CL_EVENT_REFERENCE_COUNT, sizeof(count),
+                               &count, &size);
+        test_error(error, "clGetEventInfo failed");
+
+        if (size != sizeof(count) || count == 0)
+        {
+            log_error(
+                "ERROR: Wrong command reference count (expected return value 1 "
+                "of size %zu, returned size %zu, returned value %u)\n",
+                sizeof(count), size, count);
+            return TEST_FAIL;
+        }
+
+        return CL_SUCCESS;
+    }
+};
+};
+
+int test_event_info_command_type(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandType>(device, context, queue, num_elements);
+}
+
+int test_event_info_command_queue(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<CommandQueue>(device, context, queue, num_elements);
+}
+
+int test_event_info_context(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<Context>(device, context, queue, num_elements);
+}
+
+int test_event_info_execution_status(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<ExecutionStatus>(device, context, queue,
+                                           num_elements);
+}
+
+int test_event_info_reference_count(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<ReferenceCount>(device, context, queue, num_elements);
+}
+\ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp
new file mode 100644
index 00000000..88e97a27
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp
@@ -0,0 +1,142 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "basic_command_buffer.h"
+#include "harness/typeWrappers.h"
+#include "procs.h"
+
+#include <vector>
+
+
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+// Command-buffer fill tests which handles below cases:
+//
+// -fill image
+// -fill buffer
+
+struct FillImageKHR : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error =
+            clCommandFillImageKHR(command_buffer, nullptr, image, fill_color,
+                                  origin, region, 0, nullptr, nullptr, nullptr);
+
+        test_error(error, "clCommandFillImageKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_char> output_data(data_size);
+        error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0,
+                                   output_data.data(), 0, nullptr, nullptr);
+
+        for (size_t i = 0; i < data_size; i++)
+        {
+            CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern),
+                                     output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        image = create_image_2d(context, CL_MEM_READ_WRITE, &formats, img_width,
+                                img_height, 0, NULL, &error);
+        test_error(error, "create_image_2d failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        bool imageSupport =
+            checkForImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED;
+
+        return imageSupport || BasicCommandBufferTest::Skip();
+    }
+
+    const size_t img_width = 512;
+    const size_t img_height = 512;
+    const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
+    const size_t origin[3] = { 0, 0, 0 },
+                 region[3] = { img_width, img_height, 1 };
+    const cl_uint pattern = 0x10;
+    const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+    const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+
+    clMemWrapper image;
+};
+
+struct FillBufferKHR : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandFillBufferKHR(
+            command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0,
+            data_size(), 0, nullptr, nullptr, nullptr);
+
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_char> output_data(data_size());
+        error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < data_size(); i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    const char pattern = 0x15;
+};
+
+};
+
+int test_fill_buffer(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<FillBufferKHR>(device, context, queue, num_elements);
+}
+
+int test_fill_image(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<FillImageKHR>(device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
index 4dece455..4eefc8ab 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
@@ -16,12 +16,51 @@
 #include "harness/testHarness.h"
 
 test_definition test_list[] = {
-    ADD_TEST(single_ndrange), ADD_TEST(interleaved_enqueue),
-    ADD_TEST(mixed_commands), ADD_TEST(explicit_flush),
-    ADD_TEST(user_events),    ADD_TEST(out_of_order)
+    ADD_TEST(single_ndrange),
+    ADD_TEST(interleaved_enqueue),
+    ADD_TEST(mixed_commands),
+    ADD_TEST(explicit_flush),
+    ADD_TEST(out_of_order),
+    ADD_TEST(simultaneous_out_of_order),
+    ADD_TEST(info_queues),
+    ADD_TEST(info_ref_count),
+    ADD_TEST(info_state),
+    ADD_TEST(info_prop_array),
+    ADD_TEST(basic_profiling),
+    ADD_TEST(simultaneous_profiling),
+    ADD_TEST(regular_wait_for_command_buffer),
+    ADD_TEST(command_buffer_wait_for_command_buffer),
+    ADD_TEST(command_buffer_wait_for_sec_command_buffer),
+    ADD_TEST(return_event_callback),
+    ADD_TEST(clwaitforevents_single),
+    ADD_TEST(clwaitforevents),
+    ADD_TEST(command_buffer_wait_for_regular),
+    ADD_TEST(wait_for_sec_queue_event),
+    ADD_TEST(user_event_wait),
+    ADD_TEST(user_events_wait),
+    ADD_TEST(user_event_callback),
+    ADD_TEST(queue_substitution),
+    ADD_TEST(properties_queue_substitution),
+    ADD_TEST(simultaneous_queue_substitution),
+    ADD_TEST(fill_image),
+    ADD_TEST(fill_buffer),
+    ADD_TEST(copy_image),
+    ADD_TEST(copy_buffer),
+    ADD_TEST(copy_buffer_to_image),
+    ADD_TEST(copy_image_to_buffer),
+    ADD_TEST(copy_buffer_rect),
+    ADD_TEST(barrier_wait_list),
+    ADD_TEST(basic_printf),
+    ADD_TEST(simultaneous_printf),
+    ADD_TEST(basic_set_kernel_arg),
+    ADD_TEST(pending_set_kernel_arg),
+    ADD_TEST(event_info_command_type),
+    ADD_TEST(event_info_command_queue),
+    ADD_TEST(event_info_execution_status),
+    ADD_TEST(event_info_context),
+    ADD_TEST(event_info_reference_count)
 };
 
-
 int main(int argc, const char *argv[])
 {
     // A device may report the required properties of a queue that
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
index 58fd228f..63e004a7 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -27,9 +27,108 @@ extern int test_mixed_commands(cl_device_id device, cl_context context,
                                cl_command_queue queue, int num_elements);
 extern int test_explicit_flush(cl_device_id device, cl_context context,
                                cl_command_queue queue, int num_elements);
-extern int test_user_events(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int num_elements);
 extern int test_out_of_order(cl_device_id device, cl_context context,
                              cl_command_queue queue, int num_elements);
+extern int test_basic_printf(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements);
+extern int test_simultaneous_printf(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements);
+extern int test_info_queues(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements);
+extern int test_info_ref_count(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements);
+extern int test_info_state(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_info_prop_array(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements);
+extern int test_basic_set_kernel_arg(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+extern int test_pending_set_kernel_arg(cl_device_id device, cl_context context,
+                                       cl_command_queue queue,
+                                       int num_elements);
+extern int test_regular_wait_for_command_buffer(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements);
+extern int test_command_buffer_wait_for_command_buffer(cl_device_id device,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements);
+extern int test_command_buffer_wait_for_sec_command_buffer(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_return_event_callback(cl_device_id device, cl_context context,
+                                      cl_command_queue queue, int num_elements);
+extern int test_clwaitforevents_single(cl_device_id device, cl_context context,
+                                       cl_command_queue queue,
+                                       int num_elements);
+extern int test_clwaitforevents(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements);
+extern int test_command_buffer_wait_for_regular(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements);
+extern int test_wait_for_sec_queue_event(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_user_event_wait(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements);
+extern int test_user_events_wait(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements);
+extern int test_user_event_callback(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements);
+extern int test_simultaneous_out_of_order(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_basic_profiling(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements);
+extern int test_simultaneous_profiling(cl_device_id device, cl_context context,
+                                       cl_command_queue queue,
+                                       int num_elements);
+extern int test_queue_substitution(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_properties_queue_substitution(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements);
+extern int test_simultaneous_queue_substitution(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements);
+extern int test_fill_image(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_fill_buffer(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements);
+extern int test_copy_image(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_copy_buffer(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements);
+extern int test_copy_buffer_to_image(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+extern int test_copy_image_to_buffer(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+extern int test_copy_buffer_rect(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements);
+extern int test_barrier_wait_list(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements);
+extern int test_event_info_command_type(cl_device_id device, cl_context context,
+                                        cl_command_queue queue,
+                                        int num_elements);
+extern int test_event_info_command_queue(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_event_info_context(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_event_info_execution_status(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_event_info_reference_count(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
 
 #endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp
index ab92cb89..68db364c 100644
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp
+++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_api.cpp
@@ -175,7 +175,7 @@ int api_functions(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueAcquireDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
@@ -627,7 +627,7 @@ int api_functions(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueReleaseDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp
index a204440d..84761223 100644
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp
+++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_functions_kernel.cpp
@@ -224,7 +224,7 @@ int kernel_functions(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueAcquireDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjSrcList.size()),
-                &memObjSrcList[0], 0, 0, 0);
+                &memObjSrcList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
@@ -235,7 +235,7 @@ int kernel_functions(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueAcquireDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjDstList.size()),
-                &memObjDstList[0], 0, 0, 0);
+                &memObjDstList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
@@ -377,7 +377,7 @@ int kernel_functions(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueReleaseDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjSrcList.size()),
-                &memObjSrcList[0], 0, 0, 0);
+                &memObjSrcList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
@@ -387,7 +387,7 @@ int kernel_functions(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueReleaseDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjDstList.size()),
-                &memObjDstList[0], 0, 0, 0);
+                &memObjDstList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp
index fbc616e2..280f47b5 100644
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp
+++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_interop_sync.cpp
@@ -233,7 +233,7 @@ int interop_user_sync(cl_device_id deviceID, cl_context context,
 
         error = clEnqueueAcquireDX9MediaSurfacesKHR(
             cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &memObjList.at(0), 0, 0, 0);
+            &memObjList.at(0), 0, NULL, NULL);
         if (error != CL_SUCCESS)
         {
             log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
@@ -273,7 +273,7 @@ int interop_user_sync(cl_device_id deviceID, cl_context context,
 
         error = clEnqueueReleaseDX9MediaSurfacesKHR(
             cmdQueue, static_cast<cl_uint>(memObjList.size()),
-            &memObjList.at(0), 0, 0, 0);
+            &memObjList.at(0), 0, NULL, NULL);
         if (error != CL_SUCCESS)
         {
             log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp
index 1e4e2c4e..c7242422 100644
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp
+++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_memory_access.cpp
@@ -153,7 +153,7 @@ int memory_access(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueAcquireDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
@@ -185,7 +185,7 @@ int memory_access(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueReleaseDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
@@ -239,7 +239,7 @@ int memory_access(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueAcquireDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
@@ -279,7 +279,7 @@ int memory_access(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueReleaseDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
@@ -333,7 +333,7 @@ int memory_access(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueAcquireDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueAcquireDX9MediaSurfacesKHR failed: %s\n",
@@ -399,7 +399,7 @@ int memory_access(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueReleaseDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueReleaseDX9MediaSurfacesKHR failed: %s\n",
diff --git a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp
index 0e5d1d12..8ff71279 100644
--- a/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp
+++ b/test_conformance/extensions/cl_khr_dx9_media_sharing/test_other_data_types.cpp
@@ -271,7 +271,7 @@ int other_data_types(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueAcquireDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueAcquireMediaSurfaceKHR failed: %s\n",
@@ -466,7 +466,7 @@ int other_data_types(cl_device_id deviceID, cl_context context,
 
             error = clEnqueueReleaseDX9MediaSurfacesKHR(
                 cmdQueue, static_cast<cl_uint>(memObjList.size()),
-                &memObjList[0], 0, 0, 0);
+                &memObjList[0], 0, NULL, NULL);
             if (error != CL_SUCCESS)
             {
                 log_error("clEnqueueReleaseMediaSurfaceKHR failed: %s\n",
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
new file mode 100644
index 00000000..6e02ba97
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_external_semaphore/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(MODULE_NAME CL_KHR_EXTERNAL_SEMAPHORE)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    test_external_semaphore.cpp
+)
+
+set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}")
+
+link_directories(${CLConform_VULKAN_LIBRARIES_DIR})
+
+list(APPEND CLConform_INCLUDE_DIR ${VULKAN_INCLUDE_DIR})
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+include_directories (${CLConform_INCLUDE_DIR})
+
+# needed by Vulkan wrapper to link
+if(WIN32)
+    list(APPEND CLConform_LIBRARIES vulkan-1 vulkan_wrapper)
+else(WIN32)
+    list(APPEND CLConform_LIBRARIES vulkan dl vulkan_wrapper)
+endif(WIN32)
+set(CMAKE_CXX_FLAGS "-fpermissive")
+
+include_directories("../../common/vulkan_wrapper")
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/main.cpp b/test_conformance/extensions/cl_khr_external_semaphore/main.cpp
new file mode 100644
index 00000000..193714fc
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_external_semaphore/main.cpp
@@ -0,0 +1,46 @@
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "harness/testHarness.h"
+
+test_definition test_list[] = {
+    ADD_TEST(external_semaphores_queries),
+    ADD_TEST(external_semaphores_multi_context),
+    ADD_TEST(external_semaphores_simple_1),
+    // ADD_TEST(external_semaphores_simple_2),
+    ADD_TEST(external_semaphores_reuse),
+    ADD_TEST(external_semaphores_cross_queues_ooo),
+    ADD_TEST(external_semaphores_cross_queues_io),
+    ADD_TEST(external_semaphores_cross_queues_io2),
+    ADD_TEST(external_semaphores_multi_signal),
+    ADD_TEST(external_semaphores_multi_wait),
+    // ADD_TEST(external_semaphores_order_1),
+    // ADD_TEST(external_semaphores_order_2),
+    // ADD_TEST(external_semaphores_order_3),
+    // ADD_TEST(external_semaphores_invalid_command)
+};
+
+
+int main(int argc, const char *argv[])
+{
+    // A device may report the required properties of a queue that
+    // is compatible with command-buffers via the query
+    // CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR. We account
+    // for this in the tests themselves, rather than here, where we have a
+    // device to query.
+    const cl_command_queue_properties queue_properties = 0;
+    return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list,
+                                   false, queue_properties, nullptr);
+}
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/procs.h b/test_conformance/extensions/cl_khr_external_semaphore/procs.h
new file mode 100644
index 00000000..753c8fe2
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_external_semaphore/procs.h
@@ -0,0 +1,82 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
+#define _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
+
+#include <CL/cl.h>
+
+// Basic command-buffer tests
+
+extern int test_external_semaphores_queries(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue defaultQueue,
+                                            int num_elements);
+extern int test_external_semaphores_multi_context(cl_device_id deviceID,
+                                                  cl_context context,
+                                                  cl_command_queue defaultQueue,
+                                                  int num_elements);
+extern int test_external_semaphores_simple_1(cl_device_id deviceID,
+                                             cl_context context,
+                                             cl_command_queue queue,
+                                             int num_elements);
+extern int test_external_semaphores_simple_2(cl_device_id deviceID,
+                                             cl_context context,
+                                             cl_command_queue queue,
+                                             int num_elements);
+extern int test_external_semaphores_reuse(cl_device_id deviceID,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_external_semaphores_cross_queues_ooo(cl_device_id deviceID,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     int num_elements);
+extern int test_external_semaphores_cross_queues_io(cl_device_id deviceID,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements);
+extern int test_external_semaphores_cross_queues_io2(
+    cl_device_id deviceID, cl_context context, cl_command_queue defaultQueue,
+    int num_elements);
+extern int test_external_semaphores_multi_signal(cl_device_id deviceID,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements);
+extern int test_external_semaphores_multi_wait(cl_device_id deviceID,
+                                               cl_context context,
+                                               cl_command_queue queue,
+                                               int num_elements);
+extern int test_external_semaphores_order_1(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_external_semaphores_order_2(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_external_semaphores_order_3(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_external_semaphores_import_export_fd(cl_device_id deviceID,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     int num_elements);
+extern int test_external_semaphores_invalid_command(cl_device_id deviceID,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements);
+#endif /* CL_KHR_EXTERNAL_SEMAPHORE */
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
new file mode 100644
index 00000000..a7ed307e
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
@@ -0,0 +1,1350 @@
+#include "harness/typeWrappers.h"
+#include "harness/extensionHelpers.h"
+#include "harness/errorHelpers.h"
+#include "opencl_vulkan_wrapper.hpp"
+#include <thread>
+#include <chrono>
+
+#define FLUSH_DELAY_S 5
+
+#define SEMAPHORE_PARAM_TEST(param_name, param_type, expected)                 \
+    do                                                                         \
+    {                                                                          \
+        param_type value;                                                      \
+        size_t size;                                                           \
+        cl_int error = clGetSemaphoreInfoKHR(sema, param_name, sizeof(value),  \
+                                             &value, &size);                   \
+        test_error(error, "Unable to get " #param_name " from semaphore");     \
+        if (value != expected)                                                 \
+        {                                                                      \
+            test_fail("ERROR: Parameter %s did not validate! (expected %d, "   \
+                      "got %d)\n",                                             \
+                      #param_name, expected, value);                           \
+        }                                                                      \
+        if (size != sizeof(value))                                             \
+        {                                                                      \
+            test_fail(                                                         \
+                "ERROR: Returned size of parameter %s does not validate! "     \
+                "(expected %d, got %d)\n",                                     \
+                #param_name, (int)sizeof(value), (int)size);                   \
+        }                                                                      \
+    } while (false)
+
+#define SEMAPHORE_PARAM_TEST_ARRAY(param_name, param_type, num_params,         \
+                                   expected)                                   \
+    do                                                                         \
+    {                                                                          \
+        param_type value[num_params];                                          \
+        size_t size;                                                           \
+        cl_int error = clGetSemaphoreInfoKHR(sema, param_name, sizeof(value),  \
+                                             &value, &size);                   \
+        test_error(error, "Unable to get " #param_name " from semaphore");     \
+        if (size != sizeof(value))                                             \
+        {                                                                      \
+            test_fail(                                                         \
+                "ERROR: Returned size of parameter %s does not validate! "     \
+                "(expected %d, got %d)\n",                                     \
+                #param_name, (int)sizeof(value), (int)size);                   \
+        }                                                                      \
+        if (memcmp(value, expected, size) != 0)                                \
+        {                                                                      \
+            test_fail("ERROR: Parameter %s did not validate!\n", #param_name); \
+        }                                                                      \
+    } while (false)
+
+static const char* source = "__kernel void empty() {}";
+
+static int init_vuikan_device()
+{
+    cl_platform_id platform = nullptr;
+
+    cl_int err = CL_SUCCESS;
+
+    err = clGetPlatformIDs(1, &platform, NULL);
+    if (err != CL_SUCCESS)
+    {
+        print_error(err, "Error: Failed to get platform\n");
+        return err;
+    }
+
+    init_cl_vk_ext(platform);
+
+    return CL_SUCCESS;
+}
+
+// Confirm the semaphores can be successfully queried
+int test_external_semaphores_queries(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue defaultQueue,
+                                     int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    GET_PFN(deviceID, clGetSemaphoreInfoKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+    GET_PFN(deviceID, clRetainSemaphoreKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    // Needed by the macro
+    cl_semaphore_khr sema = sema_ext.getCLSemaphore();
+
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr,
+                         CL_SEMAPHORE_TYPE_BINARY_KHR);
+
+    SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_uint, 1);
+
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, 1);
+
+    // Confirm that querying CL_SEMAPHORE_CONTEXT_KHR returns the right context
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_CONTEXT_KHR, cl_context, context);
+
+    // Confirm that querying CL_SEMAPHORE_REFERENCE_COUNT_KHR returns the right
+    // value
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint, 1);
+
+    cl_int err = CL_SUCCESS;
+
+    err = clRetainSemaphoreKHR(sema);
+    test_error(err, "Could not retain semaphore");
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint, 2);
+
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint, 1);
+
+    // Confirm that querying CL_SEMAPHORE_PAYLOAD_KHR returns the unsignaled
+    // state
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_PAYLOAD_KHR, cl_semaphore_payload_khr, 0);
+
+    return TEST_PASS;
+}
+
+int test_external_semaphores_multi_context(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue defaultQueue,
+                                           int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    cl_int err = CL_SUCCESS;
+
+    cl_context context2 =
+        clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &err);
+    if (!context2)
+    {
+        print_error(err, "Unable to create testing context");
+        return TEST_FAIL;
+    }
+
+    clExternalSemaphore sema_ext_1(vkVk2CLSemaphore, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+    clExternalSemaphore sema_ext_2(vkVk2CLSemaphore, context2,
+                                   vkExternalSemaphoreHandleType, deviceID);
+
+    clCommandQueueWrapper queue1 =
+        clCreateCommandQueue(context, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue2 =
+        clCreateCommandQueue(context2, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    // Signal semaphore 1 and 2
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue1, 1, &sema_ext_1.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 1
+    clEventWrapper wait_1_event;
+    err = clEnqueueWaitSemaphoresKHR(queue1, 1, &sema_ext_1.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_1_event);
+    test_error(err, "Could not wait semaphore");
+
+    err = clEnqueueSignalSemaphoresKHR(queue2, 1, &sema_ext_2.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 2
+    clEventWrapper wait_2_event;
+    err = clEnqueueWaitSemaphoresKHR(queue2, 1, &sema_ext_2.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_2_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue1);
+    test_error(err, "Could not finish queue");
+
+    err = clFinish(queue2);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_1_event);
+    test_assert_event_complete(wait_2_event);
+
+    return TEST_PASS;
+}
+
+// Helper function that signals and waits on semaphore across two different
+// queues.
+static int semaphore_external_cross_queue_helper(cl_device_id deviceID,
+                                                 cl_context context,
+                                                 cl_command_queue queue_1,
+                                                 cl_command_queue queue_2)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    cl_int err = CL_SUCCESS;
+
+    // Signal semaphore on queue_1
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue_1, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore on queue_2
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue_2, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish queue_1 and queue_2
+    err = clFinish(queue_1);
+    test_error(err, "Could not finish queue");
+
+    err = clFinish(queue_2);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that a signal followed by a wait will complete successfully
+int test_external_semaphores_simple_1(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue defaultQueue,
+                                      int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Signal semaphore
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that signal a semaphore with no event dependencies will not result
+// in an implicit dependency on everything previously submitted
+int test_external_semaphores_simple_2(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue defaultQueue,
+                                      int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create user event
+    clEventWrapper user_event = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Create Kernel
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                      "empty");
+    test_error(err, "Could not create kernel");
+
+    // Enqueue task_1 (dependency on user_event)
+    clEventWrapper task_1_event;
+    err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
+    test_error(err, "Could not enqueue task 1");
+
+    // Signal semaphore
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure all events are completed except for task_1
+    test_assert_event_inprogress(task_1_event);
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    // Complete user_event
+    err = clSetUserEventStatus(user_event, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(task_1_event);
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that a semaphore can be reused multiple times
+int test_external_semaphores_reuse(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue defaultQueue,
+                                   int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create Kernel
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                      "empty");
+    test_error(err, "Could not create kernel");
+
+    constexpr size_t loop_count = 10;
+    clEventWrapper signal_events[loop_count];
+    clEventWrapper wait_events[loop_count];
+    clEventWrapper task_events[loop_count];
+
+    // Enqueue task_1
+    err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]);
+    test_error(err, "Unable to enqueue task_1");
+
+    // Signal semaphore (dependency on task_1)
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 1, &task_events[0],
+                                       &signal_events[0]);
+    test_error(err, "Could not signal semaphore");
+
+    // In a loop
+    size_t loop;
+    for (loop = 1; loop < loop_count; ++loop)
+    {
+        // Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                         nullptr, 0, nullptr,
+                                         &wait_events[loop - 1]);
+        test_error(err, "Could not wait semaphore");
+
+        // Enqueue task_loop (dependency on wait)
+        err = clEnqueueTask(queue, kernel, 1, &wait_events[loop - 1],
+                            &task_events[loop]);
+        test_error(err, "Unable to enqueue task_loop");
+
+        // Wait for the "wait semaphore" to complete
+        err = clWaitForEvents(1, &wait_events[loop - 1]);
+        test_error(err, "Unable to wait for wait semaphore to complete");
+
+        // Signal semaphore (dependency on task_loop)
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                           nullptr, 1, &task_events[loop],
+                                           &signal_events[loop]);
+        test_error(err, "Could not signal semaphore");
+    }
+
+    // Wait semaphore
+    err =
+        clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                   nullptr, 0, nullptr, &wait_events[loop - 1]);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    for (loop = 0; loop < loop_count; ++loop)
+    {
+        test_assert_event_complete(wait_events[loop]);
+        test_assert_event_complete(signal_events[loop]);
+        test_assert_event_complete(task_events[loop]);
+    }
+
+    return TEST_PASS;
+}
+
+// Helper function that signals and waits on semaphore across two different
+// queues.
+static int external_semaphore_cross_queue_helper(cl_device_id deviceID,
+                                                 cl_context context,
+                                                 cl_command_queue queue_1,
+                                                 cl_command_queue queue_2)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Signal semaphore on queue_1
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue_1, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore on queue_2
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue_2, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish queue_1 and queue_2
+    err = clFinish(queue_1);
+    test_error(err, "Could not finish queue");
+
+    err = clFinish(queue_2);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    return TEST_PASS;
+}
+
+
+// Confirm that a semaphore works across different ooo queues
+int test_external_semaphores_cross_queues_ooo(cl_device_id deviceID,
+                                              cl_context context,
+                                              cl_command_queue defaultQueue,
+                                              int num_elements)
+{
+    cl_int err;
+
+    // Create ooo queues
+    clCommandQueueWrapper queue_1 = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue_2 = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    return external_semaphore_cross_queue_helper(deviceID, context, queue_1,
+                                                 queue_2);
+}
+
+// Confirm that a semaphore works across different in-order queues
+int test_external_semaphores_cross_queues_io(cl_device_id deviceID,
+                                             cl_context context,
+                                             cl_command_queue defaultQueue,
+                                             int num_elements)
+{
+    cl_int err;
+
+    // Create in-order queues
+    clCommandQueueWrapper queue_1 =
+        clCreateCommandQueue(context, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue_2 =
+        clCreateCommandQueue(context, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    return external_semaphore_cross_queue_helper(deviceID, context, queue_1,
+                                                 queue_2);
+}
+
+int test_external_semaphores_cross_queues_io2(cl_device_id deviceID,
+                                              cl_context context,
+                                              cl_command_queue defaultQueue,
+                                              int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    cl_int err = CL_SUCCESS;
+
+    cl_context context2 =
+        clCreateContext(NULL, 1, &deviceID, notify_callback, NULL, &err);
+    if (!context2)
+    {
+        print_error(err, "Unable to create testing context");
+        return TEST_FAIL;
+    }
+
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext_1(vkVk2CLSemaphore, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+    clExternalSemaphore sema_ext_2(vkVk2CLSemaphore, context2,
+                                   vkExternalSemaphoreHandleType, deviceID);
+
+    clCommandQueueWrapper queue1 =
+        clCreateCommandQueue(context, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue2 =
+        clCreateCommandQueue(context2, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    // Signal semaphore 1 and 2
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue1, 1, &sema_ext_1.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 1
+    clEventWrapper wait_1_event;
+    err = clEnqueueWaitSemaphoresKHR(queue1, 1, &sema_ext_1.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_1_event);
+    test_error(err, "Could not wait semaphore");
+
+    err = clEnqueueSignalSemaphoresKHR(queue2, 1, &sema_ext_2.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 2
+    clEventWrapper wait_2_event;
+    err = clEnqueueWaitSemaphoresKHR(queue2, 1, &sema_ext_2.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_2_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue1);
+    test_error(err, "Could not finish queue");
+
+    err = clFinish(queue2);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_1_event);
+    test_assert_event_complete(wait_2_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that we can signal multiple semaphores with one command
+int test_external_semaphores_multi_signal(cl_device_id deviceID,
+                                          cl_context context,
+                                          cl_command_queue defaultQueue,
+                                          int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkVk2CLSemaphore2(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext_1(vkVk2CLSemaphore1, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+    clExternalSemaphore sema_ext_2(vkVk2CLSemaphore2, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Signal semaphore 1 and 2
+    clEventWrapper signal_event;
+    cl_semaphore_khr sema_list[] = { sema_ext_1.getCLSemaphore(),
+                                     sema_ext_2.getCLSemaphore() };
+    err = clEnqueueSignalSemaphoresKHR(queue, 2, sema_list, nullptr, 0, nullptr,
+                                       &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 1
+    clEventWrapper wait_1_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_1_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Wait semaphore 2
+    clEventWrapper wait_2_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext_2.getCLSemaphore(),
+                                     nullptr, 0, nullptr, &wait_2_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_1_event);
+    test_assert_event_complete(wait_2_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that we can wait for multiple semaphores with one command
+int test_external_semaphores_multi_wait(cl_device_id deviceID,
+                                        cl_context context,
+                                        cl_command_queue defaultQueue,
+                                        int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkVk2CLSemaphore2(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext_1(vkVk2CLSemaphore1, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+    clExternalSemaphore sema_ext_2(vkVk2CLSemaphore2, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Signal semaphore 1
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Signal semaphore 2
+    clEventWrapper signal_2_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_2.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 1 and 2
+    clEventWrapper wait_event;
+    cl_semaphore_khr sema_list[] = { sema_ext_1.getCLSemaphore(),
+                                     sema_ext_2.getCLSemaphore() };
+    err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 0, nullptr,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_complete(signal_2_event);
+    test_assert_event_complete(wait_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that it is possible to enqueue a signal of wait and signal in any
+// order as soon as the submission order (after deferred dependencies) is
+// correct. Case: first one deferred wait, then one non deferred signal.
+int test_external_semaphores_order_1(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue defaultQueue,
+                                     int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create user event
+    clEventWrapper user_event = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Wait semaphore (dependency on user_event)
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 1, &user_event, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Signal semaphore
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 0, nullptr, &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure signal event is completed while wait event is not
+    test_assert_event_complete(signal_event);
+    test_assert_event_inprogress(wait_event);
+
+    // Complete user_event
+    err = clSetUserEventStatus(user_event, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that it is possible to enqueue a signal of wait and signal in any
+// order as soon as the submission order (after deferred dependencies) is
+// correct. Case: first two deferred signals, then one deferred wait. Unblock
+// signal, then unblock wait. When wait completes, unblock the other signal.
+int test_external_semaphores_order_2(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue defaultQueue,
+                                     int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create user events
+    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Signal semaphore (dependency on user_event_1)
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 1, &user_event_1,
+                                       &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Signal semaphore (dependency on user_event_2)
+    clEventWrapper signal_2_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 1, &user_event_2,
+                                       &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore (dependency on user_event_3)
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 1, &user_event_3, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Complete user_event_1
+    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Complete user_event_3
+    err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure all events are completed except for second signal
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_inprogress(signal_2_event);
+    test_assert_event_complete(wait_event);
+
+    // Complete user_event_2
+    err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_complete(signal_2_event);
+    test_assert_event_complete(wait_event);
+
+    return TEST_PASS;
+}
+
+// Confirm that it is possible to enqueue a signal of wait and signal in any
+// order as soon as the submission order (after deferred dependencies) is
+// correct. Case: first two deferred signals, then two deferred waits. Unblock
+// one signal and one wait (both blocked by the same user event). When wait
+// completes, unblock the other signal. Then unblock the other wait.
+int test_external_semaphores_order_3(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue defaultQueue,
+                                     int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
+                                 vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create user events
+    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Signal semaphore (dependency on user_event_1)
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 1, &user_event_1,
+                                       &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Signal semaphore (dependency on user_event_2)
+    clEventWrapper signal_2_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                       nullptr, 1, &user_event_2,
+                                       &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore (dependency on user_event_3)
+    clEventWrapper wait_1_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 1, &user_event_3, &wait_1_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Wait semaphore (dependency on user_event_2)
+    clEventWrapper wait_2_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
+                                     nullptr, 1, &user_event_2, &wait_2_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Complete user_event_2
+    err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure only second signal and second wait completed
+    cl_event event_list[] = { signal_2_event, wait_2_event };
+    err = clWaitForEvents(2, event_list);
+    test_error(err, "Could not wait for events");
+
+    test_assert_event_inprogress(signal_1_event);
+    test_assert_event_inprogress(wait_1_event);
+
+    // Complete user_event_1
+    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Complete user_event_3
+    err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_complete(signal_2_event);
+    test_assert_event_complete(wait_1_event);
+    test_assert_event_complete(wait_2_event);
+
+    return TEST_PASS;
+}
+
+// Test that an invalid semaphore command results in the invalidation of the
+// command's event and the dependencies' events
+int test_external_semaphores_invalid_command(cl_device_id deviceID,
+                                             cl_context context,
+                                             cl_command_queue defaultQueue,
+                                             int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (init_vuikan_device())
+    {
+        log_info("Cannot initialise Vulkan. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    VulkanDevice vkDevice;
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkVk2CLSemaphore2(vkDevice, vkExternalSemaphoreHandleType);
+
+    clExternalSemaphore sema_ext_1(vkVk2CLSemaphore1, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+    clExternalSemaphore sema_ext_2(vkVk2CLSemaphore2, context,
+                                   vkExternalSemaphoreHandleType, deviceID);
+
+    cl_int err = CL_SUCCESS;
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create user events
+    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Signal semaphore_1 (dependency on user_event_1)
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
+                                       nullptr, 1, &user_event_1,
+                                       &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore_1 and semaphore_2 (dependency on user_event_1)
+    clEventWrapper wait_event;
+    cl_semaphore_khr sema_list[] = { sema_ext_1.getCLSemaphore(),
+                                     sema_ext_2.getCLSemaphore() };
+    err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1,
+                                     &user_event_1, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Signal semaphore_1 (dependency on wait_event and user_event_2)
+    clEventWrapper signal_2_event;
+    cl_event wait_list[] = { user_event_2, wait_event };
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
+                                       nullptr, 2, wait_list, &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure all events are not completed
+    test_assert_event_inprogress(signal_1_event);
+    test_assert_event_inprogress(signal_2_event);
+    test_assert_event_inprogress(wait_event);
+
+    // Complete user_event_1 (expect failure as waiting on semaphore_2 is not
+    // allowed (unsignaled)
+    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
+    test_assert_error(err != CL_SUCCESS,
+                      "signal_2_event completed unexpectedly");
+
+    // Ensure signal_1 is completed while others failed (the second signal
+    // should fail as it depends on wait)
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_terminated(wait_event);
+    test_assert_event_terminated(signal_2_event);
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
new file mode 100644
index 00000000..824784a1
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(MODULE_NAME CL_KHR_SEMAPHORE)
+
+set(${MODULE_NAME}_SOURCES
+         main.cpp
+         test_semaphores.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_semaphore/main.cpp b/test_conformance/extensions/cl_khr_semaphore/main.cpp
new file mode 100644
index 00000000..ab9699b0
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/main.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <string.h>
+#include "procs.h"
+#include "harness/testHarness.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+
+test_definition test_list[] = {
+    ADD_TEST_VERSION(semaphores_simple_1, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_simple_2, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_reuse, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_cross_queues_ooo, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_cross_queues_io, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_multi_signal, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_multi_wait, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_queries, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_order_1, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_order_2, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_order_3, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_import_export_fd, Version(1, 2)),
+    ADD_TEST_VERSION(semaphores_invalid_command, Version(1, 2)),
+};
+
+const int test_num = ARRAY_SIZE(test_list);
+
+int main(int argc, const char *argv[])
+{
+    return runTestHarness(argc, argv, test_num, test_list, false, 0);
+}
diff --git a/test_conformance/extensions/cl_khr_semaphore/procs.h b/test_conformance/extensions/cl_khr_semaphore/procs.h
new file mode 100644
index 00000000..06651af4
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/procs.h
@@ -0,0 +1,57 @@
+//
+// Copyright (c) 2017 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/errorHelpers.h"
+#include "harness/kernelHelpers.h"
+#include "harness/typeWrappers.h"
+#include "harness/clImageHelper.h"
+#include "harness/imageHelpers.h"
+
+extern int test_semaphores_simple_1(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue, int num_elements);
+extern int test_semaphores_simple_2(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue, int num_elements);
+extern int test_semaphores_reuse(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements);
+extern int test_semaphores_cross_queues_ooo(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_semaphores_cross_queues_io(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
+extern int test_semaphores_multi_signal(cl_device_id deviceID,
+                                        cl_context context,
+                                        cl_command_queue queue,
+                                        int num_elements);
+extern int test_semaphores_multi_wait(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue, int num_elements);
+extern int test_semaphores_queries(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_semaphores_order_1(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_semaphores_order_2(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_semaphores_order_3(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_semaphores_import_export_fd(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_semaphores_invalid_command(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
new file mode 100644
index 00000000..7d03bff3
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
@@ -0,0 +1,1145 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+
+#include "harness/typeWrappers.h"
+#include "harness/extensionHelpers.h"
+#include "harness/errorHelpers.h"
+#include <system_error>
+#include <thread>
+#include <chrono>
+
+#define FLUSH_DELAY_S 5
+
+#define SEMAPHORE_PARAM_TEST(param_name, param_type, expected)                 \
+    do                                                                         \
+    {                                                                          \
+        param_type value;                                                      \
+        size_t size;                                                           \
+        cl_int error = clGetSemaphoreInfoKHR(sema, param_name, sizeof(value),  \
+                                             &value, &size);                   \
+        test_error(error, "Unable to get " #param_name " from semaphore");     \
+        if (value != expected)                                                 \
+        {                                                                      \
+            test_fail("ERROR: Parameter %s did not validate! (expected %d, "   \
+                      "got %d)\n",                                             \
+                      #param_name, expected, value);                           \
+        }                                                                      \
+        if (size != sizeof(value))                                             \
+        {                                                                      \
+            test_fail(                                                         \
+                "ERROR: Returned size of parameter %s does not validate! "     \
+                "(expected %d, got %d)\n",                                     \
+                #param_name, (int)sizeof(value), (int)size);                   \
+        }                                                                      \
+    } while (false)
+
+#define SEMAPHORE_PARAM_TEST_ARRAY(param_name, param_type, num_params,         \
+                                   expected)                                   \
+    do                                                                         \
+    {                                                                          \
+        param_type value[num_params];                                          \
+        size_t size;                                                           \
+        cl_int error = clGetSemaphoreInfoKHR(sema, param_name, sizeof(value),  \
+                                             &value, &size);                   \
+        test_error(error, "Unable to get " #param_name " from semaphore");     \
+        if (size != sizeof(value))                                             \
+        {                                                                      \
+            test_fail(                                                         \
+                "ERROR: Returned size of parameter %s does not validate! "     \
+                "(expected %d, got %d)\n",                                     \
+                #param_name, (int)sizeof(value), (int)size);                   \
+        }                                                                      \
+        if (memcmp(value, expected, size) != 0)                                \
+        {                                                                      \
+            test_fail("ERROR: Parameter %s did not validate!\n", #param_name); \
+        }                                                                      \
+    } while (false)
+
+static const char* source = "__kernel void empty() {}";
+
+// Helper function that signals and waits on semaphore across two different
+// queues.
+static int semaphore_cross_queue_helper(cl_device_id deviceID,
+                                        cl_context context,
+                                        cl_command_queue queue_1,
+                                        cl_command_queue queue_2)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Signal semaphore on queue_1
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue_1, 1, &sema, nullptr, 0, nullptr,
+                                       &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore on queue_2
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue_2, 1, &sema, nullptr, 0, nullptr,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish queue_1 and queue_2
+    err = clFinish(queue_1);
+    test_error(err, "Could not finish queue");
+
+    err = clFinish(queue_2);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that a signal followed by a wait will complete successfully
+int test_semaphores_simple_1(cl_device_id deviceID, cl_context context,
+                             cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Signal semaphore
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
+                                       &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that signal a semaphore with no event dependencies will not result
+// in an implicit dependency on everything previously submitted
+int test_semaphores_simple_2(cl_device_id deviceID, cl_context context,
+                             cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Create user event
+    clEventWrapper user_event = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Create Kernel
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                      "empty");
+    test_error(err, "Could not create kernel");
+
+    // Enqueue task_1 (dependency on user_event)
+    clEventWrapper task_1_event;
+    err = clEnqueueTask(queue, kernel, 1, &user_event, &task_1_event);
+    test_error(err, "Could not enqueue task 1");
+
+    // Signal semaphore
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
+                                       &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure all events are completed except for task_1
+    test_assert_event_inprogress(task_1_event);
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    // Complete user_event
+    err = clSetUserEventStatus(user_event, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(task_1_event);
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that a semaphore can be reused multiple times
+int test_semaphores_reuse(cl_device_id deviceID, cl_context context,
+                          cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Create Kernel
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    err = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                      "empty");
+    test_error(err, "Could not create kernel");
+
+    constexpr size_t loop_count = 10;
+    clEventWrapper signal_events[loop_count];
+    clEventWrapper wait_events[loop_count];
+    clEventWrapper task_events[loop_count];
+
+    // Enqueue task_1
+    err = clEnqueueTask(queue, kernel, 0, nullptr, &task_events[0]);
+    test_error(err, "Unable to enqueue task_1");
+
+    // Signal semaphore (dependency on task_1)
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
+                                       &task_events[0], &signal_events[0]);
+    test_error(err, "Could not signal semaphore");
+
+    // In a loop
+    size_t loop;
+    for (loop = 1; loop < loop_count; ++loop)
+    {
+        // Wait semaphore
+        err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
+                                         &wait_events[loop - 1]);
+        test_error(err, "Could not wait semaphore");
+
+        // Enqueue task_loop (dependency on wait)
+        err = clEnqueueTask(queue, kernel, 1, &wait_events[loop - 1],
+                            &task_events[loop]);
+        test_error(err, "Unable to enqueue task_loop");
+
+        // Wait for the "wait semaphore" to complete
+        err = clWaitForEvents(1, &wait_events[loop - 1]);
+        test_error(err, "Unable to wait for wait semaphore to complete");
+
+        // Signal semaphore (dependency on task_loop)
+        err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
+                                           &task_events[loop],
+                                           &signal_events[loop]);
+        test_error(err, "Could not signal semaphore");
+    }
+
+    // Wait semaphore
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
+                                     &wait_events[loop - 1]);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    for (loop = 0; loop < loop_count; ++loop)
+    {
+        test_assert_event_complete(wait_events[loop]);
+        test_assert_event_complete(signal_events[loop]);
+        test_assert_event_complete(task_events[loop]);
+    }
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that a semaphore works across different ooo queues
+int test_semaphores_cross_queues_ooo(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue defaultQueue,
+                                     int num_elements)
+{
+    cl_int err;
+
+    // Create ooo queues
+    clCommandQueueWrapper queue_1 = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue_2 = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    return semaphore_cross_queue_helper(deviceID, context, queue_1, queue_2);
+}
+
+// Confirm that a semaphore works across different in-order queues
+int test_semaphores_cross_queues_io(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue defaultQueue,
+                                    int num_elements)
+{
+    cl_int err;
+
+    // Create in-order queues
+    clCommandQueueWrapper queue_1 =
+        clCreateCommandQueue(context, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    clCommandQueueWrapper queue_2 =
+        clCreateCommandQueue(context, deviceID, 0, &err);
+    test_error(err, "Could not create command queue");
+
+    return semaphore_cross_queue_helper(deviceID, context, queue_1, queue_2);
+}
+
+// Confirm that we can signal multiple semaphores with one command
+int test_semaphores_multi_signal(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue defaultQueue,
+                                 int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema_1 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    cl_semaphore_khr sema_2 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Signal semaphore 1 and 2
+    clEventWrapper signal_event;
+    cl_semaphore_khr sema_list[] = { sema_1, sema_2 };
+    err = clEnqueueSignalSemaphoresKHR(queue, 2, sema_list, nullptr, 0, nullptr,
+                                       &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 1
+    clEventWrapper wait_1_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_1, nullptr, 0, nullptr,
+                                     &wait_1_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Wait semaphore 2
+    clEventWrapper wait_2_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_2, nullptr, 0, nullptr,
+                                     &wait_2_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_1_event);
+    test_assert_event_complete(wait_2_event);
+
+    // Release semaphores
+    err = clReleaseSemaphoreKHR(sema_1);
+    test_error(err, "Could not release semaphore");
+
+    err = clReleaseSemaphoreKHR(sema_2);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that we can wait for multiple semaphores with one command
+int test_semaphores_multi_wait(cl_device_id deviceID, cl_context context,
+                               cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphores
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema_1 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    cl_semaphore_khr sema_2 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Signal semaphore 1
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 0, nullptr,
+                                       &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Signal semaphore 2
+    clEventWrapper signal_2_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_2, nullptr, 0, nullptr,
+                                       &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore 1 and 2
+    clEventWrapper wait_event;
+    cl_semaphore_khr sema_list[] = { sema_1, sema_2 };
+    err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 0, nullptr,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_complete(signal_2_event);
+    test_assert_event_complete(wait_event);
+
+    // Release semaphores
+    err = clReleaseSemaphoreKHR(sema_1);
+    test_error(err, "Could not release semaphore");
+
+    err = clReleaseSemaphoreKHR(sema_2);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm the semaphores can be successfully queried
+int test_semaphores_queries(cl_device_id deviceID, cl_context context,
+                            cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clGetSemaphoreInfoKHR);
+    GET_PFN(deviceID, clRetainSemaphoreKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create binary semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Confirm that querying CL_SEMAPHORE_TYPE_KHR returns
+    // CL_SEMAPHORE_TYPE_BINARY_KHR
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr,
+                         CL_SEMAPHORE_TYPE_BINARY_KHR);
+
+    // Confirm that querying CL_SEMAPHORE_CONTEXT_KHR returns the right context
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_CONTEXT_KHR, cl_context, context);
+
+    // Confirm that querying CL_SEMAPHORE_REFERENCE_COUNT_KHR returns the right
+    // value
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint, 1);
+
+    err = clRetainSemaphoreKHR(sema);
+    test_error(err, "Could not retain semaphore");
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint, 2);
+
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint, 1);
+
+    // Confirm that querying CL_SEMAPHORE_PROPERTIES_KHR returns the same
+    // properties the semaphore was created with
+    SEMAPHORE_PARAM_TEST_ARRAY(CL_SEMAPHORE_PROPERTIES_KHR,
+                               cl_semaphore_properties_khr, 3, sema_props);
+
+    // Confirm that querying CL_SEMAPHORE_PAYLOAD_KHR returns the unsignaled
+    // state
+    SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_PAYLOAD_KHR, cl_semaphore_payload_khr, 0);
+
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that it is possible to enqueue a signal of wait and signal in any
+// order as soon as the submission order (after deferred dependencies) is
+// correct. Case: first one deferred wait, then one non deferred signal.
+int test_semaphores_order_1(cl_device_id deviceID, cl_context context,
+                            cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Create user event
+    clEventWrapper user_event = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Wait semaphore (dependency on user_event)
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Signal semaphore
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
+                                       &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure signal event is completed while wait event is not
+    test_assert_event_complete(signal_event);
+    test_assert_event_inprogress(wait_event);
+
+    // Complete user_event
+    err = clSetUserEventStatus(user_event, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that it is possible to enqueue a signal of wait and signal in any
+// order as soon as the submission order (after deferred dependencies) is
+// correct. Case: first two deferred signals, then one deferred wait. Unblock
+// signal, then unblock wait. When wait completes, unblock the other signal.
+int test_semaphores_order_2(cl_device_id deviceID, cl_context context,
+                            cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Create user events
+    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Signal semaphore (dependency on user_event_1)
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
+                                       &user_event_1, &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Signal semaphore (dependency on user_event_2)
+    clEventWrapper signal_2_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
+                                       &user_event_2, &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore (dependency on user_event_3)
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Complete user_event_1
+    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Complete user_event_3
+    err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure all events are completed except for second signal
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_inprogress(signal_2_event);
+    test_assert_event_complete(wait_event);
+
+    // Complete user_event_2
+    err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_complete(signal_2_event);
+    test_assert_event_complete(wait_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Confirm that it is possible to enqueue a signal of wait and signal in any
+// order as soon as the submission order (after deferred dependencies) is
+// correct. Case: first two deferred signals, then two deferred waits. Unblock
+// one signal and one wait (both blocked by the same user event). When wait
+// completes, unblock the other signal. Then unblock the other wait.
+int test_semaphores_order_3(cl_device_id deviceID, cl_context context,
+                            cl_command_queue defaultQueue, int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Create user events
+    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Signal semaphore (dependency on user_event_1)
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
+                                       &user_event_1, &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Signal semaphore (dependency on user_event_2)
+    clEventWrapper signal_2_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
+                                       &user_event_2, &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore (dependency on user_event_3)
+    clEventWrapper wait_1_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3,
+                                     &wait_1_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Wait semaphore (dependency on user_event_2)
+    clEventWrapper wait_2_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_2,
+                                     &wait_2_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Complete user_event_2
+    err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure only second signal and second wait completed
+    cl_event event_list[] = { signal_2_event, wait_2_event };
+    err = clWaitForEvents(2, event_list);
+    test_error(err, "Could not wait for events");
+
+    test_assert_event_inprogress(signal_1_event);
+    test_assert_event_inprogress(wait_1_event);
+
+    // Complete user_event_1
+    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Complete user_event_3
+    err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
+    test_error(err, "Could not set user event to CL_COMPLETE");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Ensure all events are completed
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_complete(signal_2_event);
+    test_assert_event_complete(wait_1_event);
+    test_assert_event_complete(wait_2_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+
+// Test it is possible to export a semaphore to a sync fd and import the same
+// sync fd to a new semaphore
+int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue defaultQueue,
+                                     int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(deviceID, "cl_khr_external_semaphore_sync_fd"))
+    {
+        log_info("cl_khr_external_semaphore_sync_fd is not supported on this "
+                 "platoform. Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clGetSemaphoreHandleForTypeKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphore
+    cl_semaphore_properties_khr sema_1_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        static_cast<cl_semaphore_properties_khr>(
+            CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+        static_cast<cl_semaphore_properties_khr>(
+            CL_SEMAPHORE_HANDLE_SYNC_FD_KHR),
+        0
+    };
+    cl_semaphore_khr sema_1 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_1_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Signal semaphore
+    clEventWrapper signal_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 0, nullptr,
+                                       &signal_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Extract sync fd
+    int handle = -1;
+    size_t handle_size;
+    err = clGetSemaphoreHandleForTypeKHR(sema_1, deviceID,
+                                         CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
+                                         sizeof(handle), &handle, &handle_size);
+    test_error(err, "Could not extract semaphore handle");
+    test_assert_error(sizeof(handle) == handle_size, "Invalid handle size");
+    test_assert_error(handle >= 0, "Invalid handle");
+
+    // Create semaphore from sync fd
+    cl_semaphore_properties_khr sema_2_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        CL_SEMAPHORE_HANDLE_SYNC_FD_KHR,
+        static_cast<cl_semaphore_properties_khr>(handle), 0
+    };
+
+    cl_semaphore_khr sema_2 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_2_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Wait semaphore
+    clEventWrapper wait_event;
+    err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_2, nullptr, 0, nullptr,
+                                     &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Finish
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    // Check all events are completed
+    test_assert_event_complete(signal_event);
+    test_assert_event_complete(wait_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema_1);
+    test_error(err, "Could not release semaphore");
+
+    err = clReleaseSemaphoreKHR(sema_2);
+    test_error(err, "Could not release semaphore");
+    return TEST_PASS;
+}
+
+// Test that an invalid semaphore command results in the invalidation of the
+// command's event and the dependencies' events
+int test_semaphores_invalid_command(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue defaultQueue,
+                                    int num_elements)
+{
+    cl_int err;
+
+    if (!is_extension_available(deviceID, "cl_khr_semaphore"))
+    {
+        log_info("cl_khr_semaphore is not supported on this platoform. "
+                 "Skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    // Obtain pointers to semaphore's API
+    GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
+    GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
+    GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
+    GET_PFN(deviceID, clReleaseSemaphoreKHR);
+
+    // Create ooo queue
+    clCommandQueueWrapper queue = clCreateCommandQueue(
+        context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
+    test_error(err, "Could not create command queue");
+
+    // Create semaphores
+    cl_semaphore_properties_khr sema_props[] = {
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
+        static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
+        0
+    };
+    cl_semaphore_khr sema_1 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    cl_semaphore_khr sema_2 =
+        clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
+    test_error(err, "Could not create semaphore");
+
+    // Create user events
+    clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
+    test_error(err, "Could not create user event");
+
+    // Signal semaphore_1 (dependency on user_event_1)
+    clEventWrapper signal_1_event;
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 1,
+                                       &user_event_1, &signal_1_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Wait semaphore_1 and semaphore_2 (dependency on user_event_1)
+    clEventWrapper wait_event;
+    cl_semaphore_khr sema_list[] = { sema_1, sema_2 };
+    err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1,
+                                     &user_event_1, &wait_event);
+    test_error(err, "Could not wait semaphore");
+
+    // Signal semaphore_1 (dependency on wait_event and user_event_2)
+    clEventWrapper signal_2_event;
+    cl_event wait_list[] = { user_event_2, wait_event };
+    err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 2, wait_list,
+                                       &signal_2_event);
+    test_error(err, "Could not signal semaphore");
+
+    // Flush and delay
+    err = clFlush(queue);
+    test_error(err, "Could not flush queue");
+    std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
+
+    // Ensure all events are not completed
+    test_assert_event_inprogress(signal_1_event);
+    test_assert_event_inprogress(signal_2_event);
+    test_assert_event_inprogress(wait_event);
+
+    // Complete user_event_1 (expect failure as waiting on semaphore_2 is not
+    // allowed (unsignaled)
+    err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
+    test_assert_error(err != CL_SUCCESS,
+                      "signal_2_event completed unexpectedly");
+
+    // Ensure signal_1 is completed while others failed (the second signal
+    // should fail as it depends on wait)
+    err = clFinish(queue);
+    test_error(err, "Could not finish queue");
+
+    test_assert_event_complete(signal_1_event);
+    test_assert_event_terminated(wait_event);
+    test_assert_event_terminated(signal_2_event);
+
+    // Release semaphore
+    err = clReleaseSemaphoreKHR(sema_1);
+    test_error(err, "Could not release semaphore");
+
+    err = clReleaseSemaphoreKHR(sema_2);
+    test_error(err, "Could not release semaphore");
+
+    return TEST_PASS;
+}
+\ No newline at end of file
diff --git a/test_conformance/generic_address_space/advanced_tests.cpp b/test_conformance/generic_address_space/advanced_tests.cpp
index b6df99a9..01cfe8a6 100644
--- a/test_conformance/generic_address_space/advanced_tests.cpp
+++ b/test_conformance/generic_address_space/advanced_tests.cpp
@@ -107,7 +107,32 @@ public:
             program = clCreateProgramWithSource(context, 1, &srcPtr, NULL, &error);
             test_error(error, "clCreateProgramWithSource failed");
 
-            error = clCompileProgram(program, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
+            // Use the latest OpenCL-C version supported by the device. This
+            // allows calling code to force a particular CL C version if it is
+            // required, but also means that callers need not specify a version
+            // if they want to assume the most recent CL C.
+
+            auto version = get_max_OpenCL_C_for_context(context);
+
+            const char* cl_std = nullptr;
+            if (version >= Version(3, 0))
+            {
+                cl_std = "-cl-std=CL3.0";
+            }
+            else if (version >= Version(2, 0) && version < Version(3, 0))
+            {
+                cl_std = "-cl-std=CL2.0";
+            }
+            else
+            {
+                // If the -cl-std build option is not specified, the highest
+                // OpenCL C 1.x language version supported by each device is
+                // used when compiling the program for each device.
+                cl_std = "";
+            }
+
+            error = clCompileProgram(program, 1, &deviceID, cl_std, 0, NULL,
+                                     NULL, NULL, NULL);
 
             if (error != CL_SUCCESS)
                 PrintCompilationLog(program, deviceID);
@@ -118,7 +143,8 @@ public:
             preCompiledLibrary = clCreateProgramWithSource(context, 1, &srcPtrLibrary, NULL, &error);
             test_error(error, "clCreateProgramWithSource failed");
 
-            error = clCompileProgram(preCompiledLibrary, 1, &deviceID, "-cl-std=CL2.0", 0, NULL, NULL, NULL, NULL);
+            error = clCompileProgram(preCompiledLibrary, 1, &deviceID, cl_std,
+                                     0, NULL, NULL, NULL, NULL);
 
             if (error != CL_SUCCESS)
                 PrintCompilationLog(preCompiledLibrary, deviceID);
@@ -136,7 +162,9 @@ public:
         }
 
         else {
-            if (create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, &srcPtr, "testKernel", "-cl-std=CL2.0")) {
+            if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                            &srcPtr, "testKernel"))
+            {
                 log_error("create_single_kernel_helper failed\n");
                 return -1;
             }
@@ -276,31 +304,36 @@ int test_library_function(cl_device_id deviceID, cl_context context, cl_command_
         NL "}"
         NL;
 
-    const std::string KERNEL_FUNCTION =
-        NL
-        NL "extern bool helperFunction(float *floatp, float val);"
-        NL
-        NL "__global float gfloat = 1.0f;"
-        NL
-        NL "__kernel void testKernel(__global uint *results) {"
-        NL "    uint tid = get_global_id(0);"
-        NL
-        NL "    __global float *gfloatp = &gfloat;"
-        NL "    __local float lfloat;"
-        NL "    lfloat = 2.0f;"
-        NL "    __local float *lfloatp = &lfloat;"
-        NL "    float pfloat = 3.0f;"
-        NL "    __private float *pfloatp = &pfloat;"
-        NL
-        NL "    uint failures = 0;"
-        NL
-        NL "    failures += helperFunction(gfloatp, gfloat) ? 0 : 1;"
-        NL "    failures += helperFunction(lfloatp, lfloat) ? 0 : 1;"
-        NL "    failures += helperFunction(pfloatp, pfloat) ? 0 : 1;"
-        NL
-        NL "    results[tid] = failures == 0;"
-        NL "}"
-        NL;
+    const std::string KERNEL_FUNCTION = R"OpenCLC(
+extern bool helperFunction(float *floatp, float val);
+
+#ifdef __opencl_c_program_scope_global_variables
+__global float gfloat = 1.0f;
+#endif
+
+__kernel void testKernel(__global uint *results) {
+    uint tid = get_global_id(0);
+
+#ifdef __opencl_c_program_scope_global_variables
+    __global float *gfloatp = &gfloat;
+#endif
+    __local float lfloat;
+    lfloat = 2.0f;
+    __local float *lfloatp = &lfloat;
+    float pfloat = 3.0f;
+    __private float *pfloatp = &pfloat;
+
+    uint failures = 0;
+
+#ifdef __opencl_c_program_scope_global_variables
+    failures += helperFunction(gfloatp, gfloat) ? 0 : 1;
+#endif
+    failures += helperFunction(lfloatp, lfloat) ? 0 : 1;
+    failures += helperFunction(pfloatp, pfloat) ? 0 : 1;
+
+    results[tid] = failures == 0;
+};
+)OpenCLC";
 
     CAdvancedTest test(LIBRARY_FUNCTION, KERNEL_FUNCTION);
 
diff --git a/test_conformance/geometrics/procs.h b/test_conformance/geometrics/procs.h
index 44f6f892..f9a96ff8 100644
--- a/test_conformance/geometrics/procs.h
+++ b/test_conformance/geometrics/procs.h
@@ -15,7 +15,6 @@
 //
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 
 extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
diff --git a/test_conformance/gl/common.h b/test_conformance/gl/common.h
index d8587cf0..155deaeb 100644
--- a/test_conformance/gl/common.h
+++ b/test_conformance/gl/common.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,17 +18,19 @@
 
 #include "testBase.h"
 
-typedef struct {
-  size_t width;
-  size_t height;
-  size_t depth;
+typedef struct
+{
+    size_t width;
+    size_t height;
+    size_t depth;
 } sizevec_t;
 
-struct format {
-  GLenum internal;
-  GLenum formattype;
-  GLenum datatype;
-  ExplicitType type;
+struct format
+{
+    GLenum internal;
+    GLenum formattype;
+    GLenum datatype;
+    ExplicitType type;
 };
 
 // These are the typically tested formats.
@@ -78,6 +80,6 @@ int test_images_get_info_common(cl_device_id device, cl_context context,
                                 size_t ntargets, sizevec_t *sizes,
                                 size_t nsizes);
 
-int is_rgb_101010_supported( cl_context context, GLenum gl_target );
+int is_rgb_101010_supported(cl_context context, GLenum gl_target);
 
 #endif // __COMMON_H__
diff --git a/test_conformance/gl/helpers.cpp b/test_conformance/gl/helpers.cpp
index 16441a47..0ec75748 100644
--- a/test_conformance/gl/helpers.cpp
+++ b/test_conformance/gl/helpers.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -14,113 +14,96 @@
 // limitations under the License.
 //
 #include "testBase.h"
-#if defined( __APPLE__ )
-        #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-        #include <GL/glu.h>
+#include <GL/glu.h>
 #endif
 
-const char *get_kernel_suffix( cl_image_format *format )
+const char *get_kernel_suffix(cl_image_format *format)
 {
-    switch( format->image_channel_data_type )
+    switch (format->image_channel_data_type)
     {
         case CL_UNORM_INT8:
         case CL_UNORM_INT16:
-    case CL_UNORM_INT24:
+        case CL_UNORM_INT24:
         case CL_SNORM_INT8:
         case CL_SNORM_INT16:
         case CL_HALF_FLOAT:
         case CL_FLOAT:
-        case CL_UNORM_INT_101010:
-            return "f";
+        case CL_UNORM_INT_101010: return "f";
         case CL_SIGNED_INT8:
         case CL_SIGNED_INT16:
-        case CL_SIGNED_INT32:
-            return "i";
+        case CL_SIGNED_INT32: return "i";
         case CL_UNSIGNED_INT8:
         case CL_UNSIGNED_INT16:
-        case CL_UNSIGNED_INT32:
-            return "ui";
+        case CL_UNSIGNED_INT32: return "ui";
         default:
-      log_error("Test error: unsupported kernel suffix for image_channel_data_type 0x%X\n",format->image_channel_data_type);
+            log_error("Test error: unsupported kernel suffix for "
+                      "image_channel_data_type 0x%X\n",
+                      format->image_channel_data_type);
             return "";
     }
 }
 
-ExplicitType get_read_kernel_type( cl_image_format *format )
+ExplicitType get_read_kernel_type(cl_image_format *format)
 {
-    switch( format->image_channel_data_type )
+    switch (format->image_channel_data_type)
     {
         case CL_UNORM_INT8:
         case CL_UNORM_INT16:
-    case CL_UNORM_INT24:
+        case CL_UNORM_INT24:
         case CL_SNORM_INT8:
         case CL_SNORM_INT16:
         case CL_HALF_FLOAT:
         case CL_FLOAT:
         case CL_UNORM_INT_101010:
 #ifdef GL_VERSION_3_2
-    case CL_DEPTH:
+        case CL_DEPTH:
 #endif
             return kFloat;
         case CL_SIGNED_INT8:
         case CL_SIGNED_INT16:
-        case CL_SIGNED_INT32:
-            return kInt;
+        case CL_SIGNED_INT32: return kInt;
         case CL_UNSIGNED_INT8:
         case CL_UNSIGNED_INT16:
-        case CL_UNSIGNED_INT32:
-            return kUInt;
+        case CL_UNSIGNED_INT32: return kUInt;
         default:
-      log_error("Test error: unsupported kernel suffix for image_channel_data_type 0x%X\n",format->image_channel_data_type);
+            log_error("Test error: unsupported kernel suffix for "
+                      "image_channel_data_type 0x%X\n",
+                      format->image_channel_data_type);
             return kNumExplicitTypes;
     }
 }
 
-ExplicitType get_write_kernel_type( cl_image_format *format )
+ExplicitType get_write_kernel_type(cl_image_format *format)
 {
-    switch( format->image_channel_data_type )
+    switch (format->image_channel_data_type)
     {
-        case CL_UNORM_INT8:
-            return kFloat;
-        case CL_UNORM_INT16:
-            return kFloat;
-    case CL_UNORM_INT24:
-      return kFloat;
-        case CL_SNORM_INT8:
-            return kFloat;
-        case CL_SNORM_INT16:
-            return kFloat;
-        case CL_HALF_FLOAT:
-            return kHalf;
-        case CL_FLOAT:
-            return kFloat;
-        case CL_SIGNED_INT8:
-            return kChar;
-        case CL_SIGNED_INT16:
-            return kShort;
-        case CL_SIGNED_INT32:
-            return kInt;
-        case CL_UNSIGNED_INT8:
-            return kUChar;
-        case CL_UNSIGNED_INT16:
-            return kUShort;
-        case CL_UNSIGNED_INT32:
-            return kUInt;
-        case CL_UNORM_INT_101010:
-            return kFloat;
+        case CL_UNORM_INT8: return kFloat;
+        case CL_UNORM_INT16: return kFloat;
+        case CL_UNORM_INT24: return kFloat;
+        case CL_SNORM_INT8: return kFloat;
+        case CL_SNORM_INT16: return kFloat;
+        case CL_HALF_FLOAT: return kHalf;
+        case CL_FLOAT: return kFloat;
+        case CL_SIGNED_INT8: return kChar;
+        case CL_SIGNED_INT16: return kShort;
+        case CL_SIGNED_INT32: return kInt;
+        case CL_UNSIGNED_INT8: return kUChar;
+        case CL_UNSIGNED_INT16: return kUShort;
+        case CL_UNSIGNED_INT32: return kUInt;
+        case CL_UNORM_INT_101010: return kFloat;
 #ifdef GL_VERSION_3_2
-    case CL_DEPTH:
-      return kFloat;
+        case CL_DEPTH: return kFloat;
 #endif
-        default:
-            return kInt;
+        default: return kInt;
     }
 }
 
-const char* get_write_conversion( cl_image_format *format, ExplicitType type )
+const char *get_write_conversion(cl_image_format *format, ExplicitType type)
 {
-    switch( format->image_channel_data_type )
+    switch (format->image_channel_data_type)
     {
         case CL_UNORM_INT8:
         case CL_UNORM_INT16:
@@ -130,250 +113,268 @@ const char* get_write_conversion( cl_image_format *format, ExplicitType type )
         case CL_FLOAT:
         case CL_UNORM_INT_101010:
         case CL_UNORM_INT24:
-            if(type != kFloat) return "convert_float4";
+            if (type != kFloat) return "convert_float4";
             break;
         case CL_SIGNED_INT8:
         case CL_SIGNED_INT16:
         case CL_SIGNED_INT32:
-            if(type != kInt) return "convert_int4";
+            if (type != kInt) return "convert_int4";
             break;
         case CL_UNSIGNED_INT8:
         case CL_UNSIGNED_INT16:
         case CL_UNSIGNED_INT32:
-            if(type != kUInt) return "convert_uint4";
+            if (type != kUInt) return "convert_uint4";
             break;
-        default:
-            return "";
+        default: return "";
     }
     return "";
 }
 
-// The only three input types to this function are kInt, kUInt and kFloat, due to the way we set up our tests
-// The output types, though, are pretty much anything valid for GL to receive
+// The only three input types to this function are kInt, kUInt and kFloat, due
+// to the way we set up our tests The output types, though, are pretty much
+// anything valid for GL to receive
 
-#define DOWNSCALE_INTEGER_CASE( enum, type, bitShift )    \
-    case enum:    \
-    {        \
-        cl_##type *dst = new cl_##type[ numPixels * 4 ]; \
-        for( size_t i = 0; i < numPixels * 4; i++ ) \
-            dst[ i ] = src[ i ];    \
-        return (char *)dst;        \
+#define DOWNSCALE_INTEGER_CASE(enum, type, bitShift)                           \
+    case enum: {                                                               \
+        cl_##type *dst = new cl_##type[numPixels * 4];                         \
+        for (size_t i = 0; i < numPixels * 4; i++) dst[i] = src[i];            \
+        return (char *)dst;                                                    \
     }
 
-#define UPSCALE_FLOAT_CASE( enum, type, typeMax )    \
-    case enum:    \
-    {        \
-        cl_##type *dst = new cl_##type[ numPixels * 4 ]; \
-        for( size_t i = 0; i < numPixels * 4; i++ ) \
-            dst[ i ] = (cl_##type)( src[ i ] * typeMax );    \
-        return (char *)dst;        \
+#define UPSCALE_FLOAT_CASE(enum, type, typeMax)                                \
+    case enum: {                                                               \
+        cl_##type *dst = new cl_##type[numPixels * 4];                         \
+        for (size_t i = 0; i < numPixels * 4; i++)                             \
+            dst[i] = (cl_##type)(src[i] * typeMax);                            \
+        return (char *)dst;                                                    \
     }
 
-char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType inType, ExplicitType outType, size_t channelNum, GLenum glDataType )
+char *convert_to_expected(void *inputBuffer, size_t numPixels,
+                          ExplicitType inType, ExplicitType outType,
+                          size_t channelNum, GLenum glDataType)
 {
 #ifdef DEBUG
-    log_info( "- Converting from input type '%s' to output type '%s'\n",
-             get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+    log_info("- Converting from input type '%s' to output type '%s'\n",
+             get_explicit_type_name(inType), get_explicit_type_name(outType));
 #endif
 
-    if( inType == outType )
+    if (inType == outType)
     {
-        char *outData = new char[ numPixels * channelNum * get_explicit_type_size(outType) ] ; // sizeof( cl_int ) ];
-        if (glDataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) {
-            for (size_t i = 0; i < numPixels; ++i) {
-                ((cl_float*)outData)[i] = ((cl_float*)inputBuffer)[2 * i];
+        char *outData =
+            new char[numPixels * channelNum
+                     * get_explicit_type_size(outType)]; // sizeof( cl_int ) ];
+        if (glDataType == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
+        {
+            for (size_t i = 0; i < numPixels; ++i)
+            {
+                ((cl_float *)outData)[i] = ((cl_float *)inputBuffer)[2 * i];
             }
         }
-        else {
-            memcpy( outData, inputBuffer, numPixels * channelNum * get_explicit_type_size(inType)  );
+        else
+        {
+            memcpy(outData, inputBuffer,
+                   numPixels * channelNum * get_explicit_type_size(inType));
         }
         return outData;
     }
-    else if( inType == kChar )
+    else if (inType == kChar)
     {
         cl_char *src = (cl_char *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-            case kInt:
-            {
-                cl_int *outData = new cl_int[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kInt: {
+                cl_int *outData = new cl_int[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_int)((src[ i ]));
+                    outData[i] = (cl_int)((src[i]));
                 }
                 return (char *)outData;
             }
-            case kFloat:
-            {
-                // If we're converting to float, then CL decided that we should be normalized
-                cl_float *outData = new cl_float[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kFloat: {
+                // If we're converting to float, then CL decided that we should
+                // be normalized
+                cl_float *outData = new cl_float[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_float)src[ i ] / 127.0f;
+                    outData[i] = (cl_float)src[i] / 127.0f;
                 }
                 return (char *)outData;
             }
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
-    else if( inType == kUChar )
+    else if (inType == kUChar)
     {
         cl_uchar *src = (cl_uchar *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-            case kUInt:
-            {
-                cl_uint *outData = new cl_uint[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kUInt: {
+                cl_uint *outData = new cl_uint[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_uint)((src[ i ]));
+                    outData[i] = (cl_uint)((src[i]));
                 }
                 return (char *)outData;
             }
-            case kFloat:
-            {
-                // If we're converting to float, then CL decided that we should be normalized
-                cl_float *outData = new cl_float[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kFloat: {
+                // If we're converting to float, then CL decided that we should
+                // be normalized
+                cl_float *outData = new cl_float[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_float)(src[ i ]) / 256.0f;
+                    outData[i] = (cl_float)(src[i]) / 256.0f;
                 }
                 return (char *)outData;
             }
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
-    else if( inType == kShort )
+    else if (inType == kShort)
     {
         cl_short *src = (cl_short *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-            case kInt:
-            {
-                cl_int *outData = new cl_int[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kInt: {
+                cl_int *outData = new cl_int[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_int)((src[ i ]));
+                    outData[i] = (cl_int)((src[i]));
                 }
                 return (char *)outData;
             }
-            case kFloat:
-            {
-                // If we're converting to float, then CL decided that we should be normalized
-                cl_float *outData = new cl_float[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kFloat: {
+                // If we're converting to float, then CL decided that we should
+                // be normalized
+                cl_float *outData = new cl_float[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_float)src[ i ] / 32768.0f;
+                    outData[i] = (cl_float)src[i] / 32768.0f;
                 }
                 return (char *)outData;
             }
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
-    else if( inType == kUShort )
+    else if (inType == kUShort)
     {
         cl_ushort *src = (cl_ushort *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-            case kUInt:
-            {
-                cl_uint *outData = new cl_uint[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kUInt: {
+                cl_uint *outData = new cl_uint[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_uint)((src[ i ]));
+                    outData[i] = (cl_uint)((src[i]));
                 }
                 return (char *)outData;
             }
-            case kFloat:
-            {
-                // If we're converting to float, then CL decided that we should be normalized
-                cl_float *outData = new cl_float[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kFloat: {
+                // If we're converting to float, then CL decided that we should
+                // be normalized
+                cl_float *outData = new cl_float[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_float)(src[ i ]) / 65535.0f;
+                    outData[i] = (cl_float)(src[i]) / 65535.0f;
                 }
                 return (char *)outData;
             }
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
-    else if( inType == kInt )
+    else if (inType == kInt)
     {
         cl_int *src = (cl_int *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-                DOWNSCALE_INTEGER_CASE( kShort, short, 16 )
-                DOWNSCALE_INTEGER_CASE( kChar, char, 24 )
-            case kFloat:
-            {
-                // If we're converting to float, then CL decided that we should be normalized
-                cl_float *outData = new cl_float[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            DOWNSCALE_INTEGER_CASE(kShort, short, 16)
+            DOWNSCALE_INTEGER_CASE(kChar, char, 24)
+            case kFloat: {
+                // If we're converting to float, then CL decided that we should
+                // be normalized
+                cl_float *outData = new cl_float[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_float)fmaxf( (float)src[ i ] / 2147483647.f, -1.f );
+                    outData[i] =
+                        (cl_float)fmaxf((float)src[i] / 2147483647.f, -1.f);
                 }
                 return (char *)outData;
             }
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
-    else if( inType == kUInt )
+    else if (inType == kUInt)
     {
         cl_uint *src = (cl_uint *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-                DOWNSCALE_INTEGER_CASE( kUShort, ushort, 16 )
-                DOWNSCALE_INTEGER_CASE( kUChar, uchar, 24 )
-            case kFloat:
-            {
-                // If we're converting to float, then CL decided that we should be normalized
-                cl_float *outData = new cl_float[ numPixels * channelNum ];
-                const cl_float MaxValue = (glDataType == GL_UNSIGNED_INT_24_8) ? 16777215.f : 4294967295.f;
-                const cl_uint ShiftBits = (glDataType == GL_UNSIGNED_INT_24_8) ? 8 : 0;
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            DOWNSCALE_INTEGER_CASE(kUShort, ushort, 16)
+            DOWNSCALE_INTEGER_CASE(kUChar, uchar, 24)
+            case kFloat: {
+                // If we're converting to float, then CL decided that we should
+                // be normalized
+                cl_float *outData = new cl_float[numPixels * channelNum];
+                const cl_float MaxValue = (glDataType == GL_UNSIGNED_INT_24_8)
+                    ? 16777215.f
+                    : 4294967295.f;
+                const cl_uint ShiftBits =
+                    (glDataType == GL_UNSIGNED_INT_24_8) ? 8 : 0;
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
-                    outData[ i ] = (cl_float)(src[ i ] >> ShiftBits) / MaxValue;
+                    outData[i] = (cl_float)(src[i] >> ShiftBits) / MaxValue;
                 }
                 return (char *)outData;
             }
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
-    else if( inType == kHalf )
+    else if (inType == kHalf)
     {
         cl_half *src = (cl_half *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-            case kFloat:
-            {
-                cl_float *outData = new cl_float[ numPixels * channelNum ];
-                for( size_t i = 0; i < numPixels * channelNum; i++ )
+            case kFloat: {
+                cl_float *outData = new cl_float[numPixels * channelNum];
+                for (size_t i = 0; i < numPixels * channelNum; i++)
                 {
                     outData[i] = cl_half_to_float(src[i]);
                 }
                 return (char *)outData;
             }
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
@@ -381,16 +382,18 @@ char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType i
     {
         cl_float *src = (cl_float *)inputBuffer;
 
-        switch( outType )
+        switch (outType)
         {
-                UPSCALE_FLOAT_CASE( kChar, char, 127.f )
-                UPSCALE_FLOAT_CASE( kUChar, uchar, 255.f )
-                UPSCALE_FLOAT_CASE( kShort, short, 32767.f )
-                UPSCALE_FLOAT_CASE( kUShort, ushort, 65535.f )
-                UPSCALE_FLOAT_CASE( kInt, int, 2147483647.f )
-                UPSCALE_FLOAT_CASE( kUInt, uint, 4294967295.f )
+            UPSCALE_FLOAT_CASE(kChar, char, 127.f)
+            UPSCALE_FLOAT_CASE(kUChar, uchar, 255.f)
+            UPSCALE_FLOAT_CASE(kShort, short, 32767.f)
+            UPSCALE_FLOAT_CASE(kUShort, ushort, 65535.f)
+            UPSCALE_FLOAT_CASE(kInt, int, 2147483647.f)
+            UPSCALE_FLOAT_CASE(kUInt, uint, 4294967295.f)
             default:
-                log_error( "ERROR: Unsupported conversion from %s to %s!\n", get_explicit_type_name( inType ), get_explicit_type_name( outType ) );
+                log_error("ERROR: Unsupported conversion from %s to %s!\n",
+                          get_explicit_type_name(inType),
+                          get_explicit_type_name(outType));
                 return NULL;
         }
     }
@@ -398,195 +401,256 @@ char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType i
     return NULL;
 }
 
-int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t typeSize )
+int validate_integer_results(void *expectedResults, void *actualResults,
+                             size_t width, size_t height, size_t sampleNum,
+                             size_t typeSize)
 {
-    return validate_integer_results( expectedResults, actualResults, width, height, sampleNum, 0, typeSize );
+    return validate_integer_results(expectedResults, actualResults, width,
+                                    height, sampleNum, 0, typeSize);
 }
 
-int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t typeSize )
+int validate_integer_results(void *expectedResults, void *actualResults,
+                             size_t width, size_t height, size_t depth,
+                             size_t sampleNum, size_t typeSize)
 {
     char *expected = (char *)expectedResults;
     char *actual = (char *)actualResults;
-  for ( size_t s = 0; s < sampleNum; s++ )
-  {
-    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    for (size_t s = 0; s < sampleNum; s++)
     {
-        for( size_t y = 0; y < height; y++ )
+        for (size_t z = 0; z < ((depth == 0) ? 1 : depth); z++)
         {
-            for( size_t x = 0; x < width; x++ )
+            for (size_t y = 0; y < height; y++)
             {
-                if( memcmp( expected, actual, typeSize * 4 ) != 0 )
+                for (size_t x = 0; x < width; x++)
                 {
-                    char scratch[ 1024 ];
-
-                    if( depth == 0 )
-              log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)s );
-                    else
-              log_error( "ERROR: Data sample %d,%d,%d,%d did not validate!\n", (int)x, (int)y, (int)z, (int)s );
-                    log_error( "\tExpected: %s\n", GetDataVectorString( expected, typeSize, 4, scratch ) );
-                    log_error( "\t  Actual: %s\n", GetDataVectorString( actual, typeSize, 4, scratch ) );
-                    return -1;
+                    if (memcmp(expected, actual, typeSize * 4) != 0)
+                    {
+                        char scratch[1024];
+
+                        if (depth == 0)
+                            log_error("ERROR: Data sample %d,%d,%d did not "
+                                      "validate!\n",
+                                      (int)x, (int)y, (int)s);
+                        else
+                            log_error("ERROR: Data sample %d,%d,%d,%d did not "
+                                      "validate!\n",
+                                      (int)x, (int)y, (int)z, (int)s);
+                        log_error("\tExpected: %s\n",
+                                  GetDataVectorString(expected, typeSize, 4,
+                                                      scratch));
+                        log_error(
+                            "\t  Actual: %s\n",
+                            GetDataVectorString(actual, typeSize, 4, scratch));
+                        return -1;
+                    }
+                    expected += typeSize * 4;
+                    actual += typeSize * 4;
                 }
-                expected += typeSize * 4;
-                actual += typeSize * 4;
             }
         }
     }
-  }
 
     return 0;
 }
 
-int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t channelNum  )
+int validate_float_results(void *expectedResults, void *actualResults,
+                           size_t width, size_t height, size_t sampleNum,
+                           size_t channelNum)
 {
-    return validate_float_results( expectedResults, actualResults, width, height, sampleNum, 0, channelNum );
+    return validate_float_results(expectedResults, actualResults, width, height,
+                                  sampleNum, 0, channelNum);
 }
 
-int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t channelNum )
+int validate_float_results(void *expectedResults, void *actualResults,
+                           size_t width, size_t height, size_t depth,
+                           size_t sampleNum, size_t channelNum)
 {
     cl_float *expected = (cl_float *)expectedResults;
     cl_float *actual = (cl_float *)actualResults;
-  for ( size_t s = 0; s < sampleNum; s++ )
-  {
-    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    for (size_t s = 0; s < sampleNum; s++)
     {
-        for( size_t y = 0; y < height; y++ )
+        for (size_t z = 0; z < ((depth == 0) ? 1 : depth); z++)
         {
-            for( size_t x = 0; x < width; x++ )
+            for (size_t y = 0; y < height; y++)
             {
-                float err = 0.f;
-          for( size_t i = 0; i < channelNum; i++ )
-                {
-                    float error = fabsf( expected[ i ] - actual[ i ] );
-                    if( error > err )
-                        err = error;
-                }
-
-                if( err > 1.f / 127.f ) // Max expected range of error if we converted from an 8-bit integer to a normalized float
+                for (size_t x = 0; x < width; x++)
                 {
-                    if( depth == 0 )
-              log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)s );
-                    else
-              log_error( "ERROR: Data sample %d,%d,%d,%d did not validate!\n", (int)x, (int)y, (int)z, (int)s );
-
-            if (channelNum == 4)
-            {
-                    log_error( "\tExpected: %f %f %f %f\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
-                    log_error( "\t        : %a %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ], expected[ 3 ] );
-                    log_error( "\t  Actual: %f %f %f %f\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
-                    log_error( "\t        : %a %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ], actual[ 3 ] );
-            }
-            else if(channelNum == 1)
-            {
-              log_error( "\tExpected: %f\n", expected[ 0 ] );
-              log_error( "\t        : %a\n", expected[ 0 ]  );
-              log_error( "\t  Actual: %f\n", actual[ 0 ] );
-              log_error( "\t        : %a\n", actual[ 0 ] );
-            }
-                    return -1;
+                    float err = 0.f;
+                    for (size_t i = 0; i < channelNum; i++)
+                    {
+                        float error = fabsf(expected[i] - actual[i]);
+                        if (error > err) err = error;
+                    }
+
+                    if (err > 1.f / 127.f) // Max expected range of error if we
+                                           // converted from an 8-bit integer to
+                                           // a normalized float
+                    {
+                        if (depth == 0)
+                            log_error("ERROR: Data sample %d,%d,%d did not "
+                                      "validate!\n",
+                                      (int)x, (int)y, (int)s);
+                        else
+                            log_error("ERROR: Data sample %d,%d,%d,%d did not "
+                                      "validate!\n",
+                                      (int)x, (int)y, (int)z, (int)s);
+
+                        if (channelNum == 4)
+                        {
+                            log_error("\tExpected: %f %f %f %f\n", expected[0],
+                                      expected[1], expected[2], expected[3]);
+                            log_error("\t        : %a %a %a %a\n", expected[0],
+                                      expected[1], expected[2], expected[3]);
+                            log_error("\t  Actual: %f %f %f %f\n", actual[0],
+                                      actual[1], actual[2], actual[3]);
+                            log_error("\t        : %a %a %a %a\n", actual[0],
+                                      actual[1], actual[2], actual[3]);
+                        }
+                        else if (channelNum == 1)
+                        {
+                            log_error("\tExpected: %f\n", expected[0]);
+                            log_error("\t        : %a\n", expected[0]);
+                            log_error("\t  Actual: %f\n", actual[0]);
+                            log_error("\t        : %a\n", actual[0]);
+                        }
+                        return -1;
+                    }
+                    expected += channelNum;
+                    actual += channelNum;
                 }
-          expected += channelNum;
-          actual += channelNum;
             }
         }
     }
-  }
 
     return 0;
 }
 
-int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum )
+int validate_float_results_rgb_101010(void *expectedResults,
+                                      void *actualResults, size_t width,
+                                      size_t height, size_t sampleNum)
 {
-    return validate_float_results_rgb_101010( expectedResults, actualResults, width, height, sampleNum, 0 );
+    return validate_float_results_rgb_101010(expectedResults, actualResults,
+                                             width, height, sampleNum, 0);
 }
 
-int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum )
+int validate_float_results_rgb_101010(void *expectedResults,
+                                      void *actualResults, size_t width,
+                                      size_t height, size_t depth,
+                                      size_t sampleNum)
 {
     cl_float *expected = (cl_float *)expectedResults;
     cl_float *actual = (cl_float *)actualResults;
-  for ( size_t s = 0; s < sampleNum; s++ )
-  {
-    for( size_t z = 0; z < ( ( depth == 0 ) ? 1 : depth ); z++ )
+    for (size_t s = 0; s < sampleNum; s++)
     {
-        for( size_t y = 0; y < height; y++ )
+        for (size_t z = 0; z < ((depth == 0) ? 1 : depth); z++)
         {
-            for( size_t x = 0; x < width; x++ )
+            for (size_t y = 0; y < height; y++)
             {
-                float err = 0.f;
-                for( size_t i = 0; i < 3; i++ ) // skip the fourth channel
+                for (size_t x = 0; x < width; x++)
                 {
-                    float error = fabsf( expected[ i ] - actual[ i ] );
-                    if( error > err )
-                        err = error;
+                    float err = 0.f;
+                    for (size_t i = 0; i < 3; i++) // skip the fourth channel
+                    {
+                        float error = fabsf(expected[i] - actual[i]);
+                        if (error > err) err = error;
+                    }
+
+                    if (err > 1.f / 127.f) // Max expected range of error if we
+                                           // converted from an 8-bit integer to
+                                           // a normalized float
+                    {
+                        if (depth == 0)
+                            log_error("ERROR: Data sample %d,%d,%d did not "
+                                      "validate!\n",
+                                      (int)x, (int)y, (int)s);
+                        else
+                            log_error("ERROR: Data sample %d,%d,%d,%d did not "
+                                      "validate!\n",
+                                      (int)x, (int)y, (int)z, (int)s);
+                        log_error("\tExpected: %f %f %f\n", expected[0],
+                                  expected[1], expected[2]);
+                        log_error("\t        : %a %a %a\n", expected[0],
+                                  expected[1], expected[2]);
+                        log_error("\t  Actual: %f %f %f\n", actual[0],
+                                  actual[1], actual[2]);
+                        log_error("\t        : %a %a %a\n", actual[0],
+                                  actual[1], actual[2]);
+                        return -1;
+                    }
+                    expected += 4;
+                    actual += 4;
                 }
-
-                if( err > 1.f / 127.f ) // Max expected range of error if we converted from an 8-bit integer to a normalized float
-                {
-                    if( depth == 0 )
-              log_error( "ERROR: Data sample %d,%d,%d did not validate!\n", (int)x, (int)y, (int)s );
-                    else
-              log_error( "ERROR: Data sample %d,%d,%d,%d did not validate!\n", (int)x, (int)y, (int)z, (int)s );
-                    log_error( "\tExpected: %f %f %f\n", expected[ 0 ], expected[ 1 ], expected[ 2 ] );
-                    log_error( "\t        : %a %a %a\n", expected[ 0 ], expected[ 1 ], expected[ 2 ] );
-                    log_error( "\t  Actual: %f %f %f\n", actual[ 0 ], actual[ 1 ], actual[ 2 ] );
-                    log_error( "\t        : %a %a %a\n", actual[ 0 ], actual[ 1 ], actual[ 2 ] );
-                    return -1;
-                }
-                expected += 4;
-                actual += 4;
             }
         }
     }
-  }
 
     return 0;
 }
 
-int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type, GLuint expected_gl_name,
-                  GLenum expected_cl_gl_texture_target, GLint expected_cl_gl_mipmap_level)
+int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type,
+                      GLuint expected_gl_name,
+                      GLenum expected_cl_gl_texture_target,
+                      GLint expected_cl_gl_mipmap_level)
 {
-  cl_gl_object_type object_type;
-  GLuint object_name;
-  GLenum texture_target;
-  GLint mipmap_level;
+    cl_gl_object_type object_type;
+    GLuint object_name;
+    GLenum texture_target;
+    GLint mipmap_level;
     int error;
 
-  error = (*clGetGLObjectInfo_ptr)(mem, &object_type, &object_name);
-  test_error( error, "clGetGLObjectInfo failed");
-  if (object_type != expected_cl_gl_type) {
-    log_error("clGetGLObjectInfo did not return expected object type: expected %d, got %d.\n", expected_cl_gl_type, object_type);
-    return -1;
-  }
-  if (object_name != expected_gl_name) {
-    log_error("clGetGLObjectInfo did not return expected object name: expected %d, got %d.\n", expected_gl_name, object_name);
-    return -1;
-  }
+    error = (*clGetGLObjectInfo_ptr)(mem, &object_type, &object_name);
+    test_error(error, "clGetGLObjectInfo failed");
+    if (object_type != expected_cl_gl_type)
+    {
+        log_error("clGetGLObjectInfo did not return expected object type: "
+                  "expected %d, got %d.\n",
+                  expected_cl_gl_type, object_type);
+        return -1;
+    }
+    if (object_name != expected_gl_name)
+    {
+        log_error("clGetGLObjectInfo did not return expected object name: "
+                  "expected %d, got %d.\n",
+                  expected_gl_name, object_name);
+        return -1;
+    }
 
-  // If we're dealing with a buffer or render buffer, we are done.
+    // If we're dealing with a buffer or render buffer, we are done.
 
-  if (object_type == CL_GL_OBJECT_BUFFER || object_type == CL_GL_OBJECT_RENDERBUFFER) {
-    return 0;
-  }
+    if (object_type == CL_GL_OBJECT_BUFFER
+        || object_type == CL_GL_OBJECT_RENDERBUFFER)
+    {
+        return 0;
+    }
 
-  // Otherwise, it's a texture-based object and requires a bit more checking.
+    // Otherwise, it's a texture-based object and requires a bit more checking.
 
-  error = (*clGetGLTextureInfo_ptr)(mem, CL_GL_TEXTURE_TARGET, sizeof(texture_target), &texture_target, NULL);
-  test_error( error, "clGetGLTextureInfo for CL_GL_TEXTURE_TARGET failed");
+    error = (*clGetGLTextureInfo_ptr)(mem, CL_GL_TEXTURE_TARGET,
+                                      sizeof(texture_target), &texture_target,
+                                      NULL);
+    test_error(error, "clGetGLTextureInfo for CL_GL_TEXTURE_TARGET failed");
 
-  if (texture_target != expected_cl_gl_texture_target) {
-    log_error("clGetGLTextureInfo did not return expected texture target: expected %d, got %d.\n", expected_cl_gl_texture_target, texture_target);
-    return -1;
-  }
+    if (texture_target != expected_cl_gl_texture_target)
+    {
+        log_error("clGetGLTextureInfo did not return expected texture target: "
+                  "expected %d, got %d.\n",
+                  expected_cl_gl_texture_target, texture_target);
+        return -1;
+    }
 
-  error = (*clGetGLTextureInfo_ptr)(mem, CL_GL_MIPMAP_LEVEL, sizeof(mipmap_level), &mipmap_level, NULL);
-  test_error( error, "clGetGLTextureInfo for CL_GL_MIPMAP_LEVEL failed");
+    error = (*clGetGLTextureInfo_ptr)(
+        mem, CL_GL_MIPMAP_LEVEL, sizeof(mipmap_level), &mipmap_level, NULL);
+    test_error(error, "clGetGLTextureInfo for CL_GL_MIPMAP_LEVEL failed");
 
-  if (mipmap_level != expected_cl_gl_mipmap_level) {
-    log_error("clGetGLTextureInfo did not return expected mipmap level: expected %d, got %d.\n", expected_cl_gl_mipmap_level, mipmap_level);
-    return -1;
-  }
+    if (mipmap_level != expected_cl_gl_mipmap_level)
+    {
+        log_error("clGetGLTextureInfo did not return expected mipmap level: "
+                  "expected %d, got %d.\n",
+                  expected_cl_gl_mipmap_level, mipmap_level);
+        return -1;
+    }
 
-  return 0;
+    return 0;
 }
 
 bool CheckGLIntegerExtensionSupport()
@@ -595,22 +659,25 @@ bool CheckGLIntegerExtensionSupport()
     const GLubyte *glVersion = glGetString(GL_VERSION);
     const GLubyte *glExtensionList = glGetString(GL_EXTENSIONS);
 
-    // Check if the OpenGL vrsion is 3.0 or grater or GL_EXT_texture_integer is supported
-    return (((glVersion[0] - '0') >= 3) || (strstr((const char*)glExtensionList, "GL_EXT_texture_integer")));
+    // Check if the OpenGL vrsion is 3.0 or grater or GL_EXT_texture_integer is
+    // supported
+    return (
+        ((glVersion[0] - '0') >= 3)
+        || (strstr((const char *)glExtensionList, "GL_EXT_texture_integer")));
 }
 
-int is_rgb_101010_supported( cl_context context, GLenum gl_target )
+int is_rgb_101010_supported(cl_context context, GLenum gl_target)
 {
-    cl_image_format formatList[ 128 ];
+    cl_image_format formatList[128];
     cl_uint formatCount = 0;
     unsigned int i;
     int error;
 
     cl_mem_object_type image_type;
 
-    switch (get_base_gl_target(gl_target)) {
-        case GL_TEXTURE_1D:
-            image_type = CL_MEM_OBJECT_IMAGE1D;
+    switch (get_base_gl_target(gl_target))
+    {
+        case GL_TEXTURE_1D: image_type = CL_MEM_OBJECT_IMAGE1D;
         case GL_TEXTURE_BUFFER:
             image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
             break;
@@ -627,27 +694,25 @@ int is_rgb_101010_supported( cl_context context, GLenum gl_target )
         case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
             image_type = CL_MEM_OBJECT_IMAGE2D;
             break;
-        case GL_TEXTURE_3D:
-            image_type = CL_MEM_OBJECT_IMAGE3D;
-        case GL_TEXTURE_1D_ARRAY:
-            image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+        case GL_TEXTURE_3D: image_type = CL_MEM_OBJECT_IMAGE3D;
+        case GL_TEXTURE_1D_ARRAY: image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
         case GL_TEXTURE_2D_ARRAY:
             image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
             break;
-        default:
-            image_type = CL_MEM_OBJECT_IMAGE2D;
+        default: image_type = CL_MEM_OBJECT_IMAGE2D;
     }
 
-    if ((error = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
-                                                                                    image_type, 128, formatList,
-                                                                                    &formatCount ))) {
+    if ((error =
+             clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, image_type,
+                                        128, formatList, &formatCount)))
+    {
         return error;
     }
 
     // Check if the RGB 101010 format is supported
-    for( i = 0; i < formatCount; i++ )
+    for (i = 0; i < formatCount; i++)
     {
-        if( formatList[ i ].image_channel_data_type == CL_UNORM_INT_101010 )
+        if (formatList[i].image_channel_data_type == CL_UNORM_INT_101010)
         {
             return 1;
         }
diff --git a/test_conformance/gl/main.cpp b/test_conformance/gl/main.cpp
index 203e915e..e5d6b65d 100644
--- a/test_conformance/gl/main.cpp
+++ b/test_conformance/gl/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,7 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 
-#if !defined (__APPLE__)
+#if !defined(__APPLE__)
 #include <CL/cl.h>
 #endif
 
@@ -31,348 +31,393 @@
 #include <unistd.h>
 #endif
 
-static cl_context        sCurrentContext = NULL;
+static cl_context sCurrentContext = NULL;
 
 
-#define TEST_FN_REDIRECT( fn ) ADD_TEST( redirect_##fn )
-#define TEST_FN_REDIRECTOR( fn ) \
-int test_redirect_##fn(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )    \
-{ \
-    int error; \
-    clCommandQueueWrapper realQueue = clCreateCommandQueueWithProperties( sCurrentContext, device, 0, &error ); \
-    test_error( error, "Unable to create command queue" );    \
-    return test_##fn( device, sCurrentContext, realQueue, numElements ); \
-}
+#define TEST_FN_REDIRECT(fn) ADD_TEST(redirect_##fn)
+#define TEST_FN_REDIRECTOR(fn)                                                 \
+    int test_redirect_##fn(cl_device_id device, cl_context context,            \
+                           cl_command_queue queue, int numElements)            \
+    {                                                                          \
+        int error;                                                             \
+        clCommandQueueWrapper realQueue = clCreateCommandQueueWithProperties(  \
+            sCurrentContext, device, 0, &error);                               \
+        test_error(error, "Unable to create command queue");                   \
+        return test_##fn(device, sCurrentContext, realQueue, numElements);     \
+    }
 
 // buffers:
-TEST_FN_REDIRECTOR( buffers )
-TEST_FN_REDIRECTOR( buffers_getinfo )
+TEST_FN_REDIRECTOR(buffers)
+TEST_FN_REDIRECTOR(buffers_getinfo)
 
 // 1D images:
-TEST_FN_REDIRECTOR( images_read_1D )
-TEST_FN_REDIRECTOR( images_write_1D )
-TEST_FN_REDIRECTOR( images_1D_getinfo )
+TEST_FN_REDIRECTOR(images_read_1D)
+TEST_FN_REDIRECTOR(images_write_1D)
+TEST_FN_REDIRECTOR(images_1D_getinfo)
 
 // 1D image arrays:
-TEST_FN_REDIRECTOR( images_read_1Darray )
-TEST_FN_REDIRECTOR( images_write_1Darray )
-TEST_FN_REDIRECTOR( images_1Darray_getinfo )
+TEST_FN_REDIRECTOR(images_read_1Darray)
+TEST_FN_REDIRECTOR(images_write_1Darray)
+TEST_FN_REDIRECTOR(images_1Darray_getinfo)
 
 // 2D images:
-TEST_FN_REDIRECTOR( images_read_2D )
-TEST_FN_REDIRECTOR( images_read_cube )
-TEST_FN_REDIRECTOR( images_write )
-TEST_FN_REDIRECTOR( images_write_cube )
-TEST_FN_REDIRECTOR( images_2D_getinfo )
-TEST_FN_REDIRECTOR( images_cube_getinfo )
+TEST_FN_REDIRECTOR(images_read_2D)
+TEST_FN_REDIRECTOR(images_read_cube)
+TEST_FN_REDIRECTOR(images_write)
+TEST_FN_REDIRECTOR(images_write_cube)
+TEST_FN_REDIRECTOR(images_2D_getinfo)
+TEST_FN_REDIRECTOR(images_cube_getinfo)
 
 // 2D image arrays:
-TEST_FN_REDIRECTOR( images_read_2Darray )
-TEST_FN_REDIRECTOR( images_write_2Darray )
-TEST_FN_REDIRECTOR( images_2Darray_getinfo )
+TEST_FN_REDIRECTOR(images_read_2Darray)
+TEST_FN_REDIRECTOR(images_write_2Darray)
+TEST_FN_REDIRECTOR(images_2Darray_getinfo)
 
 // 3D images:
-TEST_FN_REDIRECTOR( images_read_3D )
-TEST_FN_REDIRECTOR( images_write_3D )
-TEST_FN_REDIRECTOR( images_3D_getinfo )
+TEST_FN_REDIRECTOR(images_read_3D)
+TEST_FN_REDIRECTOR(images_write_3D)
+TEST_FN_REDIRECTOR(images_3D_getinfo)
 
 #ifdef GL_VERSION_3_2
 
-TEST_FN_REDIRECTOR( images_read_texturebuffer )
-TEST_FN_REDIRECTOR( images_write_texturebuffer )
-TEST_FN_REDIRECTOR( images_texturebuffer_getinfo )
+TEST_FN_REDIRECTOR(images_read_texturebuffer)
+TEST_FN_REDIRECTOR(images_write_texturebuffer)
+TEST_FN_REDIRECTOR(images_texturebuffer_getinfo)
 
 // depth textures
-TEST_FN_REDIRECTOR( images_read_2D_depth )
-TEST_FN_REDIRECTOR( images_write_2D_depth )
-TEST_FN_REDIRECTOR( images_read_2Darray_depth )
-TEST_FN_REDIRECTOR( images_write_2Darray_depth )
-
-TEST_FN_REDIRECTOR( images_read_2D_multisample )
-TEST_FN_REDIRECTOR( images_read_2Darray_multisample )
-TEST_FN_REDIRECTOR( image_methods_depth )
-TEST_FN_REDIRECTOR( image_methods_multisample )
+TEST_FN_REDIRECTOR(images_read_2D_depth)
+TEST_FN_REDIRECTOR(images_write_2D_depth)
+TEST_FN_REDIRECTOR(images_read_2Darray_depth)
+TEST_FN_REDIRECTOR(images_write_2Darray_depth)
+
+TEST_FN_REDIRECTOR(images_read_2D_multisample)
+TEST_FN_REDIRECTOR(images_read_2Darray_multisample)
+TEST_FN_REDIRECTOR(image_methods_depth)
+TEST_FN_REDIRECTOR(image_methods_multisample)
 #endif
 
 // Renderbuffer-backed images:
-TEST_FN_REDIRECTOR( renderbuffer_read )
-TEST_FN_REDIRECTOR( renderbuffer_write )
-TEST_FN_REDIRECTOR( renderbuffer_getinfo )
+TEST_FN_REDIRECTOR(renderbuffer_read)
+TEST_FN_REDIRECTOR(renderbuffer_write)
+TEST_FN_REDIRECTOR(renderbuffer_getinfo)
 
-TEST_FN_REDIRECTOR( fence_sync )
+TEST_FN_REDIRECTOR(fence_sync)
 
-test_definition test_list[] = {
-    TEST_FN_REDIRECT( buffers ),
-    TEST_FN_REDIRECT( buffers_getinfo ),
+test_definition test_list[] = { TEST_FN_REDIRECT(buffers),
+                                TEST_FN_REDIRECT(buffers_getinfo),
 
-    TEST_FN_REDIRECT( images_read_1D ),
-    TEST_FN_REDIRECT( images_write_1D ),
-    TEST_FN_REDIRECT( images_1D_getinfo ),
+                                TEST_FN_REDIRECT(images_read_1D),
+                                TEST_FN_REDIRECT(images_write_1D),
+                                TEST_FN_REDIRECT(images_1D_getinfo),
 
-    TEST_FN_REDIRECT( images_read_1Darray ),
-    TEST_FN_REDIRECT( images_write_1Darray ),
-    TEST_FN_REDIRECT( images_1Darray_getinfo ),
+                                TEST_FN_REDIRECT(images_read_1Darray),
+                                TEST_FN_REDIRECT(images_write_1Darray),
+                                TEST_FN_REDIRECT(images_1Darray_getinfo),
 
-    TEST_FN_REDIRECT( images_read_2D ),
-    TEST_FN_REDIRECT( images_write ),
-    TEST_FN_REDIRECT( images_2D_getinfo ),
+                                TEST_FN_REDIRECT(images_read_2D),
+                                TEST_FN_REDIRECT(images_write),
+                                TEST_FN_REDIRECT(images_2D_getinfo),
 
-    TEST_FN_REDIRECT( images_read_cube ),
-    TEST_FN_REDIRECT( images_write_cube ),
-    TEST_FN_REDIRECT( images_cube_getinfo ),
+                                TEST_FN_REDIRECT(images_read_cube),
+                                TEST_FN_REDIRECT(images_write_cube),
+                                TEST_FN_REDIRECT(images_cube_getinfo),
 
-    TEST_FN_REDIRECT( images_read_2Darray ),
-    TEST_FN_REDIRECT( images_write_2Darray),
-    TEST_FN_REDIRECT( images_2Darray_getinfo ),
+                                TEST_FN_REDIRECT(images_read_2Darray),
+                                TEST_FN_REDIRECT(images_write_2Darray),
+                                TEST_FN_REDIRECT(images_2Darray_getinfo),
 
-    TEST_FN_REDIRECT( images_read_3D ),
-    TEST_FN_REDIRECT( images_write_3D ),
-    TEST_FN_REDIRECT( images_3D_getinfo ),
+                                TEST_FN_REDIRECT(images_read_3D),
+                                TEST_FN_REDIRECT(images_write_3D),
+                                TEST_FN_REDIRECT(images_3D_getinfo),
 
-    TEST_FN_REDIRECT( renderbuffer_read ),
-    TEST_FN_REDIRECT( renderbuffer_write ),
-    TEST_FN_REDIRECT( renderbuffer_getinfo )
-};
+                                TEST_FN_REDIRECT(renderbuffer_read),
+                                TEST_FN_REDIRECT(renderbuffer_write),
+                                TEST_FN_REDIRECT(renderbuffer_getinfo) };
 
 test_definition test_list32[] = {
-    TEST_FN_REDIRECT( images_read_texturebuffer ),
-    TEST_FN_REDIRECT( images_write_texturebuffer ),
-    TEST_FN_REDIRECT( images_texturebuffer_getinfo ),
-
-    TEST_FN_REDIRECT( fence_sync ),
-    TEST_FN_REDIRECT( images_read_2D_depth ),
-    TEST_FN_REDIRECT( images_write_2D_depth ),
-    TEST_FN_REDIRECT( images_read_2Darray_depth ),
-    TEST_FN_REDIRECT( images_write_2Darray_depth ),
-    TEST_FN_REDIRECT( images_read_2D_multisample ),
-    TEST_FN_REDIRECT( images_read_2Darray_multisample ),
-    TEST_FN_REDIRECT( image_methods_depth ),
-    TEST_FN_REDIRECT( image_methods_multisample )
+    TEST_FN_REDIRECT(images_read_texturebuffer),
+    TEST_FN_REDIRECT(images_write_texturebuffer),
+    TEST_FN_REDIRECT(images_texturebuffer_getinfo),
+
+    TEST_FN_REDIRECT(fence_sync),
+    TEST_FN_REDIRECT(images_read_2D_depth),
+    TEST_FN_REDIRECT(images_write_2D_depth),
+    TEST_FN_REDIRECT(images_read_2Darray_depth),
+    TEST_FN_REDIRECT(images_write_2Darray_depth),
+    TEST_FN_REDIRECT(images_read_2D_multisample),
+    TEST_FN_REDIRECT(images_read_2Darray_multisample),
+    TEST_FN_REDIRECT(image_methods_depth),
+    TEST_FN_REDIRECT(image_methods_multisample)
 };
 
-const int test_num = ARRAY_SIZE( test_list );
-const int test_num32 = ARRAY_SIZE( test_list32 );
+const int test_num = ARRAY_SIZE(test_list);
+const int test_num32 = ARRAY_SIZE(test_list32);
 
 int main(int argc, const char *argv[])
 {
-  gTestRounding = true;
-  int error = 0;
-  int numErrors = 0;
-
-  test_start();
-  argc = parseCustomParam(argc, argv);
-  if (argc == -1)
-  {
-    return -1;
-  }	
-
-  cl_device_type requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
-
-  /* Do we have a CPU/GPU specification? */
-  if( argc > 1 )
-  {
-    if( strcmp( argv[ argc - 1 ], "gpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_GPU" ) == 0 )
-    {
-      requestedDeviceType = CL_DEVICE_TYPE_GPU;
-      argc--;
-  }
-    else if( strcmp( argv[ argc - 1 ], "cpu" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_CPU" ) == 0 )
-    {
-      requestedDeviceType = CL_DEVICE_TYPE_CPU;
-      argc--;
-    }
-    else if( strcmp( argv[ argc - 1 ], "accelerator" ) == 0 || strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 )
+    gTestRounding = true;
+    int error = 0;
+    int numErrors = 0;
+
+    test_start();
+    argc = parseCustomParam(argc, argv);
+    if (argc == -1)
     {
-      requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
-      argc--;
+        return -1;
     }
-    else if( strcmp( argv[ argc - 1 ], "CL_DEVICE_TYPE_DEFAULT" ) == 0 )
+
+    cl_device_type requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
+
+    /* Do we have a CPU/GPU specification? */
+    if (argc > 1)
     {
-      requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
-      argc--;
+        if (strcmp(argv[argc - 1], "gpu") == 0
+            || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_GPU") == 0)
+        {
+            requestedDeviceType = CL_DEVICE_TYPE_GPU;
+            argc--;
+        }
+        else if (strcmp(argv[argc - 1], "cpu") == 0
+                 || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_CPU") == 0)
+        {
+            requestedDeviceType = CL_DEVICE_TYPE_CPU;
+            argc--;
+        }
+        else if (strcmp(argv[argc - 1], "accelerator") == 0
+                 || strcmp(argv[argc - 1], "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+        {
+            requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+            argc--;
+        }
+        else if (strcmp(argv[argc - 1], "CL_DEVICE_TYPE_DEFAULT") == 0)
+        {
+            requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
+            argc--;
+        }
     }
-  }
 
-    if( argc > 1 && strcmp( argv[ 1 ], "-list" ) == 0 )
+    if (argc > 1 && strcmp(argv[1], "-list") == 0)
     {
-        log_info( "Available 2.x tests:\n" );
-        for( int i = 0; i < test_num; i++ )
-            log_info( "\t%s\n", test_list[i].name );
+        log_info("Available 2.x tests:\n");
+        for (int i = 0; i < test_num; i++)
+            log_info("\t%s\n", test_list[i].name);
 
-        log_info( "Available 3.2 tests:\n" );
-        for( int i = 0; i < test_num32; i++ )
-            log_info( "\t%s\n", test_list32[i].name );
+        log_info("Available 3.2 tests:\n");
+        for (int i = 0; i < test_num32; i++)
+            log_info("\t%s\n", test_list32[i].name);
 
-    log_info( "Note: Any 3.2 test names must follow 2.1 test names on the command line.\n" );
-    log_info( "Use environment variables to specify desired device.\n" );
+        log_info("Note: Any 3.2 test names must follow 2.1 test names on the "
+                 "command line.\n");
+        log_info("Use environment variables to specify desired device.\n");
 
         return 0;
     }
 
-  // Check to see if any 2.x or 3.2 test names were specified on the command line.
-  unsigned first_32_testname = 0;
+    // Check to see if any 2.x or 3.2 test names were specified on the command
+    // line.
+    unsigned first_32_testname = 0;
 
-  for (int j=1; (j<argc) && (!first_32_testname); ++j)
-    for (int i = 0; i < test_num32; ++i)
-      if (strcmp(test_list32[i].name, argv[j]) == 0) {
-        first_32_testname = j;
-        break;
-      }
+    for (int j = 1; (j < argc) && (!first_32_testname); ++j)
+        for (int i = 0; i < test_num32; ++i)
+            if (strcmp(test_list32[i].name, argv[j]) == 0)
+            {
+                first_32_testname = j;
+                break;
+            }
 
-  // Create the environment for the test.
+    // Create the environment for the test.
     GLEnvironment *glEnv = GLEnvironment::Instance();
 
-  // Check if any devices of the requested type support CL/GL interop.
-  int supported = glEnv->SupportsCLGLInterop( requestedDeviceType );
-  if( supported == 0 ) {
-    log_info("Test not run because GL-CL interop is not supported for any devices of the requested type.\n");
-    return 0;
-  } else if ( supported == -1 ) {
-    log_error("Unable to setup the test or failed to determine if CL-GL interop is supported.\n");
-    return -1;
-  }
-
-  // Initialize function pointers.
-  error = init_clgl_ext();
-  if (error < 0) {
-    return error;
-  }
-
-  // OpenGL tests for non-3.2 ////////////////////////////////////////////////////////
-  if ((argc == 1) || (first_32_testname != 1)) {
-
-    // At least one device supports CL-GL interop, so init the test.
-    if( glEnv->Init( &argc, (char **)argv, CL_FALSE ) ) {
-      log_error("Failed to initialize the GL environment for this test.\n");
-      return -1;
+    // Check if any devices of the requested type support CL/GL interop.
+    int supported = glEnv->SupportsCLGLInterop(requestedDeviceType);
+    if (supported == 0)
+    {
+        log_info("Test not run because GL-CL interop is not supported for any "
+                 "devices of the requested type.\n");
+        return 0;
     }
-
-    // Create a context to use and then grab a device (or devices) from it
-    sCurrentContext = glEnv->CreateCLContext();
-    if( sCurrentContext == NULL )
-      {
-        log_error( "ERROR: Unable to obtain CL context from GL\n" );
-        return -1;
-      }
-
-    size_t numDevices = 0;
-    cl_device_id *deviceIDs;
-
-    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL, &numDevices);
-    if( error != CL_SUCCESS )
-      {
-        print_error( error, "Unable to get device count from context" );
-        return -1;
-      }
-    deviceIDs = (cl_device_id *)malloc(numDevices);
-    if (deviceIDs == NULL) {
-        print_error( error, "malloc failed" );
+    else if (supported == -1)
+    {
+        log_error("Unable to setup the test or failed to determine if CL-GL "
+                  "interop is supported.\n");
         return -1;
     }
-    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, numDevices, deviceIDs, NULL);
-    if( error != CL_SUCCESS ) {
-      print_error( error, "Unable to get device list from context" );
-      return -1;
-    }
-
-    numDevices /= sizeof(cl_device_id);
 
-    if (numDevices < 1) {
-      log_error("No devices found.\n");
-      return -1;
+    // Initialize function pointers.
+    error = init_clgl_ext();
+    if (error < 0)
+    {
+        return error;
     }
 
-    // Execute tests.
-    int argc_ = (first_32_testname) ? first_32_testname : argc;
+    // OpenGL tests for non-3.2
+    // ////////////////////////////////////////////////////////
+    if ((argc == 1) || (first_32_testname != 1))
+    {
 
-      for( size_t i = 0; i < numDevices; i++ ) {
-        log_info( "\nTesting OpenGL 2.x\n" );
-        if( printDeviceHeader( deviceIDs[ i ] ) != CL_SUCCESS ) {
-          return -1;
+        // At least one device supports CL-GL interop, so init the test.
+        if (glEnv->Init(&argc, (char **)argv, CL_FALSE))
+        {
+            log_error(
+                "Failed to initialize the GL environment for this test.\n");
+            return -1;
         }
 
-        // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
-        error = parseAndCallCommandLineTests( argc_, argv, deviceIDs[i], test_num, test_list, true, 0, 1024 );
-        if( error != 0 )
-          break;
-    }
+        // Create a context to use and then grab a device (or devices) from it
+        sCurrentContext = glEnv->CreateCLContext();
+        if (sCurrentContext == NULL)
+        {
+            log_error("ERROR: Unable to obtain CL context from GL\n");
+            return -1;
+        }
 
-    numErrors += error;
+        size_t numDevices = 0;
+        cl_device_id *deviceIDs;
 
-    // Clean-up.
-      free(deviceIDs);
-      clReleaseContext( sCurrentContext );
-      //delete glEnv;
-  }
+        error = clGetContextInfo(sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL,
+                                 &numDevices);
+        if (error != CL_SUCCESS)
+        {
+            print_error(error, "Unable to get device count from context");
+            return -1;
+        }
+        deviceIDs = (cl_device_id *)malloc(numDevices);
+        if (deviceIDs == NULL)
+        {
+            print_error(error, "malloc failed");
+            return -1;
+        }
+        error = clGetContextInfo(sCurrentContext, CL_CONTEXT_DEVICES,
+                                 numDevices, deviceIDs, NULL);
+        if (error != CL_SUCCESS)
+        {
+            print_error(error, "Unable to get device list from context");
+            return -1;
+        }
 
-  // OpenGL 3.2 tests. ////////////////////////////////////////////////////////
-  if ((argc==1) || first_32_testname) {
+        numDevices /= sizeof(cl_device_id);
 
-    // At least one device supports CL-GL interop, so init the test.
-    if( glEnv->Init( &argc, (char **)argv, CL_TRUE ) ) {
-      log_error("Failed to initialize the GL environment for this test.\n");
-      return -1;
-    }
+        if (numDevices < 1)
+        {
+            log_error("No devices found.\n");
+            return -1;
+        }
 
-    // Create a context to use and then grab a device (or devices) from it
-    sCurrentContext = glEnv->CreateCLContext();
-    if( sCurrentContext == NULL ) {
-      log_error( "ERROR: Unable to obtain CL context from GL\n" );
-      return -1;
-    }
+        // Execute tests.
+        int argc_ = (first_32_testname) ? first_32_testname : argc;
+
+        for (size_t i = 0; i < numDevices; i++)
+        {
+            log_info("\nTesting OpenGL 2.x\n");
+            if (printDeviceHeader(deviceIDs[i]) != CL_SUCCESS)
+            {
+                return -1;
+            }
+
+            // Note: don't use the entire harness, because we have a different
+            // way of obtaining the device (via the context)
+            test_harness_config config{};
+            config.forceNoContextCreation = true;
+            config.numElementsToUse = 1024;
+            config.queueProps = 0;
+            error = parseAndCallCommandLineTests(argc_, argv, deviceIDs[i],
+                                                 test_num, test_list, config);
+            if (error != 0) break;
+        }
 
-    size_t numDevices = 0;
-    cl_device_id *deviceIDs;
+        numErrors += error;
 
-    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL, &numDevices);
-    if( error != CL_SUCCESS ) {
-      print_error( error, "Unable to get device count from context" );
-      return -1;
-    }
-    deviceIDs = (cl_device_id *)malloc(numDevices);
-    if (deviceIDs == NULL) {
-        print_error( error, "malloc failed" );
-        return -1;
-    }
-    error = clGetContextInfo( sCurrentContext, CL_CONTEXT_DEVICES, numDevices, deviceIDs, NULL);
-    if( error != CL_SUCCESS ) {
-      print_error( error, "Unable to get device list from context" );
-      return -1;
+        // Clean-up.
+        free(deviceIDs);
+        clReleaseContext(sCurrentContext);
+        // delete glEnv;
     }
 
-    numDevices /= sizeof(cl_device_id);
+    // OpenGL 3.2 tests.
+    // ////////////////////////////////////////////////////////
+    if ((argc == 1) || first_32_testname)
+    {
 
-    if (numDevices < 1) {
-      log_error("No devices found.\n");
-      return -1;
-    }
+        // At least one device supports CL-GL interop, so init the test.
+        if (glEnv->Init(&argc, (char **)argv, CL_TRUE))
+        {
+            log_error(
+                "Failed to initialize the GL environment for this test.\n");
+            return -1;
+        }
 
-    int argc_ = (first_32_testname) ? 1 + (argc - first_32_testname) : argc;
-    const char** argv_ = (first_32_testname) ? &argv[first_32_testname-1] : argv;
+        // Create a context to use and then grab a device (or devices) from it
+        sCurrentContext = glEnv->CreateCLContext();
+        if (sCurrentContext == NULL)
+        {
+            log_error("ERROR: Unable to obtain CL context from GL\n");
+            return -1;
+        }
 
-    // Execute the tests.
-      for( size_t i = 0; i < numDevices; i++ ) {
-        log_info( "\nTesting OpenGL 3.2\n" );
-        if( printDeviceHeader( deviceIDs[ i ] ) != CL_SUCCESS ) {
-          return -1;
+        size_t numDevices = 0;
+        cl_device_id *deviceIDs;
+
+        error = clGetContextInfo(sCurrentContext, CL_CONTEXT_DEVICES, 0, NULL,
+                                 &numDevices);
+        if (error != CL_SUCCESS)
+        {
+            print_error(error, "Unable to get device count from context");
+            return -1;
+        }
+        deviceIDs = (cl_device_id *)malloc(numDevices);
+        if (deviceIDs == NULL)
+        {
+            print_error(error, "malloc failed");
+            return -1;
+        }
+        error = clGetContextInfo(sCurrentContext, CL_CONTEXT_DEVICES,
+                                 numDevices, deviceIDs, NULL);
+        if (error != CL_SUCCESS)
+        {
+            print_error(error, "Unable to get device list from context");
+            return -1;
         }
 
-        // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
-        error = parseAndCallCommandLineTests( argc_, argv_, deviceIDs[i], test_num32, test_list32, true, 0, 1024 );
-        if( error != 0 )
-          break;
-    }
+        numDevices /= sizeof(cl_device_id);
+
+        if (numDevices < 1)
+        {
+            log_error("No devices found.\n");
+            return -1;
+        }
 
-    numErrors += error;
+        int argc_ = (first_32_testname) ? 1 + (argc - first_32_testname) : argc;
+        const char **argv_ =
+            (first_32_testname) ? &argv[first_32_testname - 1] : argv;
+
+        // Execute the tests.
+        for (size_t i = 0; i < numDevices; i++)
+        {
+            log_info("\nTesting OpenGL 3.2\n");
+            if (printDeviceHeader(deviceIDs[i]) != CL_SUCCESS)
+            {
+                return -1;
+            }
+
+            // Note: don't use the entire harness, because we have a different
+            // way of obtaining the device (via the context)
+            test_harness_config config{};
+            config.forceNoContextCreation = true;
+            config.numElementsToUse = 1024;
+            config.queueProps = 0;
+            error = parseAndCallCommandLineTests(
+                argc_, argv_, deviceIDs[i], test_num32, test_list32, config);
+            if (error != 0) break;
+        }
 
-    // Clean-up.
-      free(deviceIDs);
-      clReleaseContext( sCurrentContext );
-      delete glEnv;
+        numErrors += error;
 
-  }
+        // Clean-up.
+        free(deviceIDs);
+        clReleaseContext(sCurrentContext);
+        delete glEnv;
+    }
 
-  //All done.
-  return numErrors;
+    // All done.
+    return numErrors;
 }
-
diff --git a/test_conformance/gl/procs.h b/test_conformance/gl/procs.h
index b14e22dc..111de7a6 100644
--- a/test_conformance/gl/procs.h
+++ b/test_conformance/gl/procs.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,121 +20,134 @@
 #pragma mark -
 #pragma Misc tests
 
-extern int test_buffers( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_buffers(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements);
 
-extern int test_fence_sync( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_fence_sync(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements);
 
 
 #pragma mark -
 #pragma mark Tead tests
 
-extern int test_images_read_2D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_read_2D(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements);
 
-extern int test_images_read_1D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_read_1D(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements);
 
-extern int test_images_read_texturebuffer( cl_device_id device, cl_context context,
-                               cl_command_queue queue, int num_elements );
+extern int test_images_read_texturebuffer(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
 
-extern int test_images_read_1Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_read_1Darray(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements);
 
-extern int test_images_read_2Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_read_2Darray(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements);
 
-extern int test_images_read_cube( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_read_cube(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements);
 
-extern int test_images_read_3D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_read_3D(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements);
 
-extern int test_renderbuffer_read( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_renderbuffer_read(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements);
 
 #pragma mark -
 #pragma mark Write tests
 
 // 2D tests are the ones with no suffix:
 
-extern int test_images_write( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_write(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements);
 
-extern int test_images_write_cube( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_write_cube(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements);
 
-extern int test_renderbuffer_write( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_renderbuffer_write(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements);
 
 // Here are the rest:
 
-extern int test_images_write_1D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_write_1D(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements);
 
-extern int test_images_write_texturebuffer( cl_device_id device, cl_context context,
-                                cl_command_queue queue, int num_elements );
+extern int test_images_write_texturebuffer(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
 
-extern int test_images_write_1Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_write_1Darray(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements);
 
-extern int test_images_write_2Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_write_2Darray(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements);
 
-extern int test_images_write_3D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int num_elements );
+extern int test_images_write_3D(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int num_elements);
 
 #pragma mark -
 #pragma mark Get info test entry points
 
-extern int test_buffers_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_buffers_getinfo(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int numElements);
 
-extern int test_images_1D_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_1D_getinfo(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int numElements);
 
-extern int test_images_texturebuffer_getinfo( cl_device_id device, cl_context context,
-                                  cl_command_queue queue, int numElements );
+extern int test_images_texturebuffer_getinfo(cl_device_id device,
+                                             cl_context context,
+                                             cl_command_queue queue,
+                                             int numElements);
 
-extern int test_images_1Darray_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_1Darray_getinfo(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int numElements);
 
-extern int test_images_2D_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_2D_getinfo(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int numElements);
 
-extern int test_images_2Darray_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_2Darray_getinfo(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int numElements);
 
-extern int test_images_cube_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_cube_getinfo(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int numElements);
 
-extern int test_images_3D_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_3D_getinfo(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int numElements);
 
-extern int test_images_read_2D_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_read_2D_depth(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int numElements);
 
-extern int test_images_write_2D_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_write_2D_depth(cl_device_id device, cl_context context,
+                                      cl_command_queue queue, int numElements);
 
-extern int test_images_read_2Darray_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int );
+extern int test_images_read_2Darray_depth(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue, int);
 
-extern int test_images_write_2Darray_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_write_2Darray_depth(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int numElements);
 
-extern int test_images_read_2D_multisample( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
+extern int test_images_read_2D_multisample(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int numElements);
 
-extern int test_images_read_2Darray_multisample( cl_device_id device, cl_context context,
-  cl_command_queue queue, int );
+extern int test_images_read_2Darray_multisample(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue, int);
 
-extern int test_image_methods_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int );
+extern int test_image_methods_depth(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int);
 
-extern int test_image_methods_multisample( cl_device_id device, cl_context context,
-  cl_command_queue queue, int );
+extern int test_image_methods_multisample(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue, int);
 
-extern int test_renderbuffer_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements );
-\ No newline at end of file
+extern int test_renderbuffer_getinfo(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int numElements);
+\ No newline at end of file
diff --git a/test_conformance/gl/testBase.h b/test_conformance/gl/testBase.h
index 1ac0f50b..7e187536 100644
--- a/test_conformance/gl/testBase.h
+++ b/test_conformance/gl/testBase.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,7 +23,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#if !defined (__APPLE__)
+#if !defined(__APPLE__)
 #include <CL/cl.h>
 #include "gl/gl_headers.h"
 #include <CL/cl_gl.h>
@@ -34,32 +34,46 @@
 #include "harness/imageHelpers.h"
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
 
 #include "gl/helpers.h"
 
-extern const char *get_kernel_suffix( cl_image_format *format );
-extern const char *get_write_conversion( cl_image_format *format, ExplicitType type);
-extern ExplicitType get_read_kernel_type( cl_image_format *format );
-extern ExplicitType get_write_kernel_type( cl_image_format *format );
+extern const char *get_kernel_suffix(cl_image_format *format);
+extern const char *get_write_conversion(cl_image_format *format,
+                                        ExplicitType type);
+extern ExplicitType get_read_kernel_type(cl_image_format *format);
+extern ExplicitType get_write_kernel_type(cl_image_format *format);
 
-extern char * convert_to_expected( void * inputBuffer, size_t numPixels, ExplicitType inType, ExplicitType outType, size_t channelNum, GLenum glDataType = 0);
-extern int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t typeSize );
-extern int validate_integer_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t typeSize );
-extern int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum, size_t channelNum );
-extern int validate_float_results( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum, size_t channelNum );
-extern int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t sampleNum );
-extern int validate_float_results_rgb_101010( void *expectedResults, void *actualResults, size_t width, size_t height, size_t depth, size_t sampleNum );
+extern char *convert_to_expected(void *inputBuffer, size_t numPixels,
+                                 ExplicitType inType, ExplicitType outType,
+                                 size_t channelNum, GLenum glDataType = 0);
+extern int validate_integer_results(void *expectedResults, void *actualResults,
+                                    size_t width, size_t height,
+                                    size_t sampleNum, size_t typeSize);
+extern int validate_integer_results(void *expectedResults, void *actualResults,
+                                    size_t width, size_t height, size_t depth,
+                                    size_t sampleNum, size_t typeSize);
+extern int validate_float_results(void *expectedResults, void *actualResults,
+                                  size_t width, size_t height, size_t sampleNum,
+                                  size_t channelNum);
+extern int validate_float_results(void *expectedResults, void *actualResults,
+                                  size_t width, size_t height, size_t depth,
+                                  size_t sampleNum, size_t channelNum);
+extern int validate_float_results_rgb_101010(void *expectedResults,
+                                             void *actualResults, size_t width,
+                                             size_t height, size_t sampleNum);
+extern int validate_float_results_rgb_101010(void *expectedResults,
+                                             void *actualResults, size_t width,
+                                             size_t height, size_t depth,
+                                             size_t sampleNum);
 
-extern int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type, GLuint expected_gl_name,
-                             GLenum expected_cl_gl_texture_target, GLint expected_cl_gl_mipmap_level);
+extern int CheckGLObjectInfo(cl_mem mem, cl_gl_object_type expected_cl_gl_type,
+                             GLuint expected_gl_name,
+                             GLenum expected_cl_gl_texture_target,
+                             GLint expected_cl_gl_mipmap_level);
 
 extern bool CheckGLIntegerExtensionSupport();
 
 #endif // _testBase_h
-
-
-
diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp
index 35cc62de..088d7497 100644
--- a/test_conformance/gl/test_fence_sync.cpp
+++ b/test_conformance/gl/test_fence_sync.cpp
@@ -113,6 +113,7 @@ typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
 clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
 
 
+// clang-format off
 static const char *updateBuffersKernel[] = {
     "__kernel void update( __global float4 * vertices, __global float4 "
     "*colors, int horizWrap, int rowIdx )\n"
@@ -132,6 +133,7 @@ static const char *updateBuffersKernel[] = {
     "    colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
     "}\n"
 };
+// clang-format on
 
 // Passthrough VertexShader
 static const char *vertexshader = "#version 150\n"
diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp
index 7d055fb2..187f2e6e 100644
--- a/test_conformance/gl/test_image_methods.cpp
+++ b/test_conformance/gl/test_image_methods.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,16 +24,17 @@ struct image_kernel_data
     cl_int width;
     cl_int height;
     cl_int depth;
-  cl_int arraySize;
+    cl_int arraySize;
     cl_int widthDim;
     cl_int heightDim;
     cl_int channelType;
     cl_int channelOrder;
     cl_int expectedChannelType;
     cl_int expectedChannelOrder;
-  cl_int numSamples;
+    cl_int numSamples;
 };
 
+// clang-format off
 static const char *methodTestKernelPattern = 
 "%s"
 "typedef struct {\n"
@@ -53,239 +54,260 @@ static const char *methodTestKernelPattern =
 "{\n"
 "%s%s%s%s%s%s%s%s%s%s%s"
 "}\n";
+// clang-format on
 
 static const char *arraySizeKernelLine =
-"   outData->arraySize = get_image_array_size( input );\n";
+    "   outData->arraySize = get_image_array_size( input );\n";
 static const char *imageWidthKernelLine =
-"   outData->width = get_image_width( input );\n";
+    "   outData->width = get_image_width( input );\n";
 static const char *imageHeightKernelLine =
-"   outData->height = get_image_height( input );\n";
+    "   outData->height = get_image_height( input );\n";
 static const char *imageDimKernelLine =
-"   int2 dim = get_image_dim( input );\n";
-static const char *imageWidthDimKernelLine =
-"   outData->widthDim = dim.x;\n";
+    "   int2 dim = get_image_dim( input );\n";
+static const char *imageWidthDimKernelLine = "   outData->widthDim = dim.x;\n";
 static const char *imageHeightDimKernelLine =
-"   outData->heightDim = dim.y;\n";
+    "   outData->heightDim = dim.y;\n";
 static const char *channelTypeKernelLine =
-"   outData->channelType = get_image_channel_data_type( input );\n";
+    "   outData->channelType = get_image_channel_data_type( input );\n";
 static const char *channelTypeConstLine =
-"   outData->expectedChannelType = CLK_%s;\n";
+    "   outData->expectedChannelType = CLK_%s;\n";
 static const char *channelOrderKernelLine =
-"   outData->channelOrder = get_image_channel_order( input );\n";
+    "   outData->channelOrder = get_image_channel_order( input );\n";
 static const char *channelOrderConstLine =
-"   outData->expectedChannelOrder = CLK_%s;\n";
+    "   outData->expectedChannelOrder = CLK_%s;\n";
 static const char *numSamplesKernelLine =
-"   outData->numSamples = get_image_num_samples( input );\n";
+    "   outData->numSamples = get_image_num_samples( input );\n";
 static const char *enableMSAAKernelLine =
-"#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n";
+    "#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable\n";
 
-static int verify(cl_int input, cl_int kernelOutput, const char * description)
+static int verify(cl_int input, cl_int kernelOutput, const char *description)
 {
-  if( kernelOutput != input )
-  {
-    log_error( "ERROR: %s did not validate (expected %d, got %d)\n", description, input, kernelOutput);
-      return -1;
-  }
-  return 0;
+    if (kernelOutput != input)
+    {
+        log_error("ERROR: %s did not validate (expected %d, got %d)\n",
+                  description, input, kernelOutput);
+        return -1;
+    }
+    return 0;
 }
 
-extern int supportsMsaa(cl_context context, bool* supports_msaa);
-extern int supportsDepth(cl_context context, bool* supports_depth);
+extern int supportsMsaa(cl_context context, bool *supports_msaa);
+extern int supportsDepth(cl_context context, bool *supports_depth);
 
-int test_image_format_methods( cl_device_id device, cl_context context, cl_command_queue queue,
-                       size_t width, size_t height, size_t arraySize, size_t samples,
-                        GLenum target, format format, MTdata d )
+int test_image_format_methods(cl_device_id device, cl_context context,
+                              cl_command_queue queue, size_t width,
+                              size_t height, size_t arraySize, size_t samples,
+                              GLenum target, format format, MTdata d)
 {
-    int error, result=0;
+    int error, result = 0;
 
     clProgramWrapper program;
     clKernelWrapper kernel;
     clMemWrapper image, outDataBuffer;
-    char programSrc[ 10240 ];
+    char programSrc[10240];
 
-    image_kernel_data    outKernelData;
+    image_kernel_data outKernelData;
 
 #ifdef GL_VERSION_3_2
-    if (get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE ||
-        get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+    if (get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE
+        || get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
     {
         bool supports_msaa;
         error = supportsMsaa(context, &supports_msaa);
-        if( error != 0 ) return error;
+        if (error != 0) return error;
         if (!supports_msaa) return 0;
     }
-    if (format.formattype == GL_DEPTH_COMPONENT ||
-        format.formattype == GL_DEPTH_STENCIL)
+    if (format.formattype == GL_DEPTH_COMPONENT
+        || format.formattype == GL_DEPTH_STENCIL)
     {
         bool supports_depth;
         error = supportsDepth(context, &supports_depth);
-        if( error != 0 ) return error;
+        if (error != 0) return error;
         if (!supports_depth) return 0;
     }
 #endif
-  DetectFloatToHalfRoundingMode(queue);
-
-  glTextureWrapper glTexture;
-  switch (get_base_gl_target(target)) {
-    case GL_TEXTURE_2D:
-      CreateGLTexture2D( width, height, target,
-                        format.formattype, format.internal, format.datatype,
-                        format.type, &glTexture, &error, false, d );
-      break;
-    case GL_TEXTURE_2D_ARRAY:
-      CreateGLTexture2DArray( width, height, arraySize, target,
-                             format.formattype, format.internal, format.datatype,
-                             format.type, &glTexture, &error, false, d );
-      break;
-    case GL_TEXTURE_2D_MULTISAMPLE:
-      CreateGLTexture2DMultisample( width, height, samples, target,
-                                   format.formattype, format.internal, format.datatype,
-                                   format.type, &glTexture, &error, false, d, false);
-      break;
-    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-      CreateGLTexture2DArrayMultisample( width, height, arraySize, samples, target,
-                                        format.formattype, format.internal, format.datatype,
-                                        format.type, &glTexture, &error, false, d, false);
-      break;
-
-    default:
-      log_error("Unsupported GL tex target (%s) passed to write test: "
-                "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
-                __FILE__, __LINE__);
-  }
-
-  // Check to see if the texture could not be created for some other reason like
-  // GL_FRAMEBUFFER_UNSUPPORTED
-  if (error == GL_FRAMEBUFFER_UNSUPPORTED) {
-    return 0;
-  }
+    DetectFloatToHalfRoundingMode(queue);
+
+    glTextureWrapper glTexture;
+    switch (get_base_gl_target(target))
+    {
+        case GL_TEXTURE_2D:
+            CreateGLTexture2D(width, height, target, format.formattype,
+                              format.internal, format.datatype, format.type,
+                              &glTexture, &error, false, d);
+            break;
+        case GL_TEXTURE_2D_ARRAY:
+            CreateGLTexture2DArray(width, height, arraySize, target,
+                                   format.formattype, format.internal,
+                                   format.datatype, format.type, &glTexture,
+                                   &error, false, d);
+            break;
+        case GL_TEXTURE_2D_MULTISAMPLE:
+            CreateGLTexture2DMultisample(width, height, samples, target,
+                                         format.formattype, format.internal,
+                                         format.datatype, format.type,
+                                         &glTexture, &error, false, d, false);
+            break;
+        case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+            CreateGLTexture2DArrayMultisample(
+                width, height, arraySize, samples, target, format.formattype,
+                format.internal, format.datatype, format.type, &glTexture,
+                &error, false, d, false);
+            break;
+
+        default:
+            log_error("Unsupported GL tex target (%s) passed to write test: "
+                      "%s (%s):%d",
+                      GetGLTargetName(target), __FUNCTION__, __FILE__,
+                      __LINE__);
+    }
+
+    // Check to see if the texture could not be created for some other reason
+    // like GL_FRAMEBUFFER_UNSUPPORTED
+    if (error == GL_FRAMEBUFFER_UNSUPPORTED)
+    {
+        return 0;
+    }
 
     // Construct testing source
-  log_info( " - Creating image %d by %d...\n", width, height );
-  // Create a CL image from the supplied GL texture
-  image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY,
-                                        target, 0, glTexture, &error );
-
-  if ( error != CL_SUCCESS ) {
-    print_error( error, "Unable to create CL image from GL texture" );
-    GLint fmt;
-    glGetTexLevelParameteriv( target, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
-    log_error( "    Supplied GL texture was base format %s and internal "
-              "format %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
-    return error;
-  }
-
-  cl_image_format imageFormat;
-  error = clGetImageInfo (image, CL_IMAGE_FORMAT,
-                          sizeof(imageFormat), &imageFormat, NULL);
-  test_error(error, "Failed to get image format");
-
-  const char * imageType = 0;
-  bool doArraySize = false;
-  bool doImageWidth = false;
-  bool doImageHeight = false;
-  bool doImageChannelDataType = false;
-  bool doImageChannelOrder = false;
-  bool doImageDim = false;
-  bool doNumSamples = false;
-  bool doMSAA = false;
-  switch(target) {
-    case GL_TEXTURE_2D:
-      imageType = "image2d_depth_t";
-      doImageWidth = true;
-      doImageHeight = true;
-      doImageChannelDataType = true;
-      doImageChannelOrder = true;
-      doImageDim = true;
-      break;
-    case GL_TEXTURE_2D_ARRAY:
-      imageType = "image2d_array_depth_t";
-      doImageWidth = true;
-      doImageHeight = true;
-      doArraySize = true;
-      doImageChannelDataType = true;
-      doImageChannelOrder = true;
-      doImageDim = true;
-      doArraySize = true;
-      break;
-    case GL_TEXTURE_2D_MULTISAMPLE:
-      doNumSamples = true;
-      doMSAA = true;
-      if(format.formattype == GL_DEPTH_COMPONENT) {
-        doImageWidth = true;
-        imageType = "image2d_msaa_depth_t";
-      } else {
-        imageType = "image2d_msaa_t";
-      }
-      break;
-    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-      doMSAA = true;
-      if(format.formattype == GL_DEPTH_COMPONENT) {
-        doImageWidth = true;
-        imageType = "image2d_msaa_array_depth_t";
-      } else {
-        imageType = "image2d_array_msaa_t";
-      }
-      break;
-  }
-
-
-
-  char channelTypeConstKernelLine[512] = {0};
-  char channelOrderConstKernelLine[512] = {0};
-  const char* channelTypeName=0;
-  const char* channelOrderName=0;
-  if(doImageChannelDataType) {
-    channelTypeName = GetChannelTypeName( imageFormat.image_channel_data_type );
-    if(channelTypeName && strlen(channelTypeName)) {
-      // replace CL_* with CLK_*
-      sprintf(channelTypeConstKernelLine, channelTypeConstLine, &channelTypeName[3]);
+    log_info(" - Creating image %d by %d...\n", width, height);
+    // Create a CL image from the supplied GL texture
+    image = (*clCreateFromGLTexture_ptr)(context, CL_MEM_READ_ONLY, target, 0,
+                                         glTexture, &error);
+
+    if (error != CL_SUCCESS)
+    {
+        print_error(error, "Unable to create CL image from GL texture");
+        GLint fmt;
+        glGetTexLevelParameteriv(target, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt);
+        log_error("    Supplied GL texture was base format %s and internal "
+                  "format %s\n",
+                  GetGLBaseFormatName(fmt), GetGLFormatName(fmt));
+        return error;
+    }
+
+    cl_image_format imageFormat;
+    error = clGetImageInfo(image, CL_IMAGE_FORMAT, sizeof(imageFormat),
+                           &imageFormat, NULL);
+    test_error(error, "Failed to get image format");
+
+    const char *imageType = 0;
+    bool doArraySize = false;
+    bool doImageWidth = false;
+    bool doImageHeight = false;
+    bool doImageChannelDataType = false;
+    bool doImageChannelOrder = false;
+    bool doImageDim = false;
+    bool doNumSamples = false;
+    bool doMSAA = false;
+    switch (target)
+    {
+        case GL_TEXTURE_2D:
+            imageType = "image2d_depth_t";
+            doImageWidth = true;
+            doImageHeight = true;
+            doImageChannelDataType = true;
+            doImageChannelOrder = true;
+            doImageDim = true;
+            break;
+        case GL_TEXTURE_2D_ARRAY:
+            imageType = "image2d_array_depth_t";
+            doImageWidth = true;
+            doImageHeight = true;
+            doArraySize = true;
+            doImageChannelDataType = true;
+            doImageChannelOrder = true;
+            doImageDim = true;
+            doArraySize = true;
+            break;
+        case GL_TEXTURE_2D_MULTISAMPLE:
+            doNumSamples = true;
+            doMSAA = true;
+            if (format.formattype == GL_DEPTH_COMPONENT)
+            {
+                doImageWidth = true;
+                imageType = "image2d_msaa_depth_t";
+            }
+            else
+            {
+                imageType = "image2d_msaa_t";
+            }
+            break;
+        case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+            doMSAA = true;
+            if (format.formattype == GL_DEPTH_COMPONENT)
+            {
+                doImageWidth = true;
+                imageType = "image2d_msaa_array_depth_t";
+            }
+            else
+            {
+                imageType = "image2d_array_msaa_t";
+            }
+            break;
+    }
+
+
+    char channelTypeConstKernelLine[512] = { 0 };
+    char channelOrderConstKernelLine[512] = { 0 };
+    const char *channelTypeName = 0;
+    const char *channelOrderName = 0;
+    if (doImageChannelDataType)
+    {
+        channelTypeName =
+            GetChannelTypeName(imageFormat.image_channel_data_type);
+        if (channelTypeName && strlen(channelTypeName))
+        {
+            // replace CL_* with CLK_*
+            sprintf(channelTypeConstKernelLine, channelTypeConstLine,
+                    &channelTypeName[3]);
+        }
     }
-  }
-  if(doImageChannelOrder) {
-    channelOrderName = GetChannelOrderName( imageFormat.image_channel_order );
-    if(channelOrderName && strlen(channelOrderName)) {
-      // replace CL_* with CLK_*
-      sprintf(channelOrderConstKernelLine, channelOrderConstLine, &channelOrderName[3]);
+    if (doImageChannelOrder)
+    {
+        channelOrderName = GetChannelOrderName(imageFormat.image_channel_order);
+        if (channelOrderName && strlen(channelOrderName))
+        {
+            // replace CL_* with CLK_*
+            sprintf(channelOrderConstKernelLine, channelOrderConstLine,
+                    &channelOrderName[3]);
+        }
     }
-  }
-
-	// Create a program to run against
-	sprintf(programSrc, 
-          methodTestKernelPattern, 
-          ( doMSAA ) ? enableMSAAKernelLine : "",
-	        imageType,
-          ( doArraySize ) ? arraySizeKernelLine : "",
-          ( doImageWidth ) ? imageWidthKernelLine : "",
-          ( doImageHeight ) ? imageHeightKernelLine : "",
-          ( doImageChannelDataType ) ? channelTypeKernelLine : "",
-          ( doImageChannelDataType ) ? channelTypeConstKernelLine : "",
-          ( doImageChannelOrder ) ? channelOrderKernelLine : "",
-          ( doImageChannelOrder ) ? channelOrderConstKernelLine : "",
-          ( doImageDim ) ? imageDimKernelLine : "",
-          ( doImageDim && doImageWidth ) ? imageWidthDimKernelLine : "",
-          ( doImageDim && doImageHeight ) ? imageHeightDimKernelLine : "",
-          ( doNumSamples ) ? numSamplesKernelLine : "");
-
-
-  //log_info("-----------------------------------\n%s\n", programSrc);
-  error = clFinish(queue);
-  if (error)
-    print_error(error, "clFinish failed.\n");
+
+    // Create a program to run against
+    sprintf(programSrc, methodTestKernelPattern,
+            (doMSAA) ? enableMSAAKernelLine : "", imageType,
+            (doArraySize) ? arraySizeKernelLine : "",
+            (doImageWidth) ? imageWidthKernelLine : "",
+            (doImageHeight) ? imageHeightKernelLine : "",
+            (doImageChannelDataType) ? channelTypeKernelLine : "",
+            (doImageChannelDataType) ? channelTypeConstKernelLine : "",
+            (doImageChannelOrder) ? channelOrderKernelLine : "",
+            (doImageChannelOrder) ? channelOrderConstKernelLine : "",
+            (doImageDim) ? imageDimKernelLine : "",
+            (doImageDim && doImageWidth) ? imageWidthDimKernelLine : "",
+            (doImageDim && doImageHeight) ? imageHeightDimKernelLine : "",
+            (doNumSamples) ? numSamplesKernelLine : "");
+
+
+    // log_info("-----------------------------------\n%s\n", programSrc);
+    error = clFinish(queue);
+    if (error) print_error(error, "clFinish failed.\n");
     const char *ptr = programSrc;
-    error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_kernel" );
-    test_error( error, "Unable to create kernel to test against" );
+    error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
+                                        "sample_kernel");
+    test_error(error, "Unable to create kernel to test against");
 
     // Create an output buffer
     outDataBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                    sizeof(outKernelData), NULL, &error);
-    test_error( error, "Unable to create output buffer" );
+    test_error(error, "Unable to create output buffer");
 
     // Set up arguments and run
-    error = clSetKernelArg( kernel, 0, sizeof( image ), &image );
-    test_error( error, "Unable to set kernel argument" );
-    error = clSetKernelArg( kernel, 1, sizeof( outDataBuffer ), &outDataBuffer );
-    test_error( error, "Unable to set kernel argument" );
+    error = clSetKernelArg(kernel, 0, sizeof(image), &image);
+    test_error(error, "Unable to set kernel argument");
+    error = clSetKernelArg(kernel, 1, sizeof(outDataBuffer), &outDataBuffer);
+    test_error(error, "Unable to set kernel argument");
 
     // Finish and Acquire.
     glFinish();
@@ -294,119 +316,155 @@ int test_image_format_methods( cl_device_id device, cl_context context, cl_comma
 
     size_t threads[1] = { 1 }, localThreads[1] = { 1 };
 
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to run kernel" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, NULL);
+    test_error(error, "Unable to run kernel");
 
-    error = clEnqueueReadBuffer( queue, outDataBuffer, CL_TRUE, 0, sizeof( outKernelData ), &outKernelData, 0, NULL, NULL );
-    test_error( error, "Unable to read data buffer" );
+    error = clEnqueueReadBuffer(queue, outDataBuffer, CL_TRUE, 0,
+                                sizeof(outKernelData), &outKernelData, 0, NULL,
+                                NULL);
+    test_error(error, "Unable to read data buffer");
 
     // Verify the results now
-  if( doImageWidth )
-    result |= verify(width, outKernelData.width, "width");
-  if( doImageHeight)
-    result |= verify(height, outKernelData.height, "height");
-  if( doImageDim && doImageWidth )
-    result |= verify(width, outKernelData.widthDim, "width from get_image_dim");
-  if( doImageDim && doImageHeight )
-    result |= verify(height, outKernelData.heightDim, "height from get_image_dim");
-  if( doImageChannelDataType )
-    result |= verify(outKernelData.channelType, outKernelData.expectedChannelType, channelTypeName);
-  if( doImageChannelOrder )
-    result |= verify(outKernelData.channelOrder, outKernelData.expectedChannelOrder, channelOrderName);
-  if( doArraySize )
-    result |= verify(arraySize, outKernelData.arraySize, "array size");
-  if( doNumSamples )
-    result |= verify(samples, outKernelData.numSamples, "samples");
-  if(result) {
-    log_error("Test image methods failed");
-  }
-
-  clEventWrapper event;
-  error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &image, 0, NULL, &event );
-  test_error(error, "clEnqueueReleaseGLObjects failed");
-
-  error = clWaitForEvents( 1, &event );
-  test_error(error, "clWaitForEvents failed");
+    if (doImageWidth) result |= verify(width, outKernelData.width, "width");
+    if (doImageHeight) result |= verify(height, outKernelData.height, "height");
+    if (doImageDim && doImageWidth)
+        result |=
+            verify(width, outKernelData.widthDim, "width from get_image_dim");
+    if (doImageDim && doImageHeight)
+        result |= verify(height, outKernelData.heightDim,
+                         "height from get_image_dim");
+    if (doImageChannelDataType)
+        result |= verify(outKernelData.channelType,
+                         outKernelData.expectedChannelType, channelTypeName);
+    if (doImageChannelOrder)
+        result |= verify(outKernelData.channelOrder,
+                         outKernelData.expectedChannelOrder, channelOrderName);
+    if (doArraySize)
+        result |= verify(arraySize, outKernelData.arraySize, "array size");
+    if (doNumSamples)
+        result |= verify(samples, outKernelData.numSamples, "samples");
+    if (result)
+    {
+        log_error("Test image methods failed");
+    }
+
+    clEventWrapper event;
+    error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &image, 0, NULL, &event);
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    error = clWaitForEvents(1, &event);
+    test_error(error, "clWaitForEvents failed");
 
     return result;
 }
 
-int test_image_methods_depth( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ){
-  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
-    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
-    return 0;
-  }
+int test_image_methods_depth(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int numElements)
+{
+    if (!is_extension_available(device, "cl_khr_gl_depth_images"))
+    {
+        log_info("Test not run because 'cl_khr_gl_depth_images' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
 
     int result = 0;
-  GLenum depth_targets[] = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
-  size_t ntargets = sizeof(depth_targets) / sizeof(depth_targets[0]);
-  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
-
-  const size_t nsizes = 5;
-  sizevec_t sizes[nsizes];
-  // Need to limit texture size according to GL device properties
-  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, maxTextureLayers = 16, size;
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
-  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
-
-  size = min(maxTextureSize, maxTextureRectangleSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
-  }
-
-  for (size_t i = 0; i < nsizes; i++) {
-    for(size_t itarget = 0; itarget < ntargets; ++itarget) {
-      for(size_t iformat = 0; iformat < nformats; ++iformat)
-        result |= test_image_format_methods(device, context, queue, sizes[i].width, sizes[i].height, (depth_targets[itarget] == GL_TEXTURE_2D_ARRAY) ? sizes[i].depth: 1, 0,
-                                  depth_targets[itarget], depth_formats[iformat], seed );
+    GLenum depth_targets[] = { GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY };
+    size_t ntargets = sizeof(depth_targets) / sizeof(depth_targets[0]);
+    size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+
+    const size_t nsizes = 5;
+    sizevec_t sizes[nsizes];
+    // Need to limit texture size according to GL device properties
+    GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096,
+          maxTextureLayers = 16, size;
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+    glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+    glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+
+    size = min(maxTextureSize, maxTextureRectangleSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].height = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].depth =
+            random_in_range(2, min(maxTextureLayers, 1 << (i + 4)), seed);
+    }
+
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        for (size_t itarget = 0; itarget < ntargets; ++itarget)
+        {
+            for (size_t iformat = 0; iformat < nformats; ++iformat)
+                result |= test_image_format_methods(
+                    device, context, queue, sizes[i].width, sizes[i].height,
+                    (depth_targets[itarget] == GL_TEXTURE_2D_ARRAY)
+                        ? sizes[i].depth
+                        : 1,
+                    0, depth_targets[itarget], depth_formats[iformat], seed);
+        }
     }
-  }
     return result;
 }
 
-int test_image_methods_multisample( cl_device_id device, cl_context context, cl_command_queue queue, int numElements ){
-  if (!is_extension_available(device, "cl_khr_gl_msaa_sharing")) {
-    log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is not supported by the tested device\n");
-    return 0;
-  }
-
-  int result = 0;
-  GLenum targets[] = {GL_TEXTURE_2D_MULTISAMPLE, GL_TEXTURE_2D_MULTISAMPLE_ARRAY};
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
-
-  const size_t nsizes = 5;
-  sizevec_t sizes[nsizes];
-  GLint maxTextureLayers = 16, maxTextureSize = 4096;
-  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
-        }
+int test_image_methods_multisample(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int numElements)
+{
+    if (!is_extension_available(device, "cl_khr_gl_msaa_sharing"))
+    {
+        log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
+
+    int result = 0;
+    GLenum targets[] = { GL_TEXTURE_2D_MULTISAMPLE,
+                         GL_TEXTURE_2D_MULTISAMPLE_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+    const size_t nsizes = 5;
+    sizevec_t sizes[nsizes];
+    GLint maxTextureLayers = 16, maxTextureSize = 4096;
+    glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
 
-  glEnable(GL_MULTISAMPLE);
+    RandomSeed seed(gRandomSeed);
 
-  for (size_t i = 0; i < nsizes; i++) {
-    for(size_t itarget = 0; itarget < ntargets; ++itarget) {
-      for(size_t iformat = 0; iformat < nformats; ++iformat) {
-        GLint samples = get_gl_max_samples(targets[itarget], common_formats[iformat].internal);
-        result |= test_image_format_methods(device, context, queue, sizes[i].width, sizes[i].height, (targets[ntargets] == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) ? sizes[i].depth: 1,
-                                  samples, targets[itarget], common_formats[iformat], seed );
-      }
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].height =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].depth =
+            random_in_range(2, min(maxTextureLayers, 1 << (i + 4)), seed);
+    }
+
+    glEnable(GL_MULTISAMPLE);
+
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        for (size_t itarget = 0; itarget < ntargets; ++itarget)
+        {
+            for (size_t iformat = 0; iformat < nformats; ++iformat)
+            {
+                GLint samples = get_gl_max_samples(
+                    targets[itarget], common_formats[iformat].internal);
+                result |= test_image_format_methods(
+                    device, context, queue, sizes[i].width, sizes[i].height,
+                    (targets[ntargets] == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+                        ? sizes[i].depth
+                        : 1,
+                    samples, targets[itarget], common_formats[iformat], seed);
+            }
+        }
     }
-  }
     return result;
 }
diff --git a/test_conformance/gl/test_images_1D.cpp b/test_conformance/gl/test_images_1D.cpp
index 172dd4b5..4ccf86bc 100644
--- a/test_conformance/gl/test_images_1D.cpp
+++ b/test_conformance/gl/test_images_1D.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,11 +16,11 @@
 #include "common.h"
 #include "testBase.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 #include <algorithm>
 
@@ -28,114 +28,116 @@ using namespace std;
 
 void calc_test_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit array size according to GL device properties
-  GLint maxTextureSize = 4096, maxTextureBufferSize = 4096, size;
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-  glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &maxTextureBufferSize);
-
-  size = min(maxTextureSize, maxTextureBufferSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].height = 1;
-    sizes[i].depth  = 1;
-  }
+    // Need to limit array size according to GL device properties
+    GLint maxTextureSize = 4096, maxTextureBufferSize = 4096, size;
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+    glGetIntegerv(GL_MAX_TEXTURE_BUFFER_SIZE, &maxTextureBufferSize);
+
+    size = min(maxTextureSize, maxTextureBufferSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].height = 1;
+        sizes[i].depth = 1;
+    }
 }
 
-int test_images_read_1D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_read_1D(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int numElements)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_1D };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_1D };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_test_size_descriptors(sizes, nsizes);
 
-  return test_images_read_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+    return test_images_read_common(device, context, queue, common_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_write_1D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write_1D(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = { GL_TEXTURE_1D };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_1D };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_test_size_descriptors(sizes, nsizes);
 
-  return test_images_write_common( device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    return test_images_write_common(device, context, queue, common_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_1D_getinfo( cl_device_id device, cl_context context,
-    cl_command_queue queue, int numElements )
+int test_images_1D_getinfo(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_1D };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_1D };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_test_size_descriptors(sizes, nsizes);
 
-  return test_images_get_info_common( device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes);
+    return test_images_get_info_common(device, context, queue, common_formats,
+                                       nformats, targets, ntargets, sizes,
+                                       nsizes);
 }
 
-int test_images_read_texturebuffer( cl_device_id device, cl_context context,
-                        cl_command_queue queue, int numElements )
+int test_images_read_texturebuffer(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int numElements)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_BUFFER };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_BUFFER };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_test_size_descriptors(sizes, nsizes);
 
-  return test_images_read_common(device, context, queue, common_formats,
-                                 nformats, targets, ntargets, sizes, nsizes);
+    return test_images_read_common(device, context, queue, common_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_write_texturebuffer( cl_device_id device, cl_context context,
-                         cl_command_queue queue, int numElements )
+int test_images_write_texturebuffer(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = { GL_TEXTURE_BUFFER };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_BUFFER };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_test_size_descriptors(sizes, nsizes);
 
-  return test_images_write_common( device, context, queue, common_formats,
-                                  nformats, targets, ntargets, sizes, nsizes );
+    return test_images_write_common(device, context, queue, common_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_texturebuffer_getinfo( cl_device_id device, cl_context context,
-                           cl_command_queue queue, int numElements )
+int test_images_texturebuffer_getinfo(cl_device_id device, cl_context context,
+                                      cl_command_queue queue, int numElements)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_BUFFER };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_BUFFER };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_test_size_descriptors(sizes, nsizes);
 
-  return test_images_get_info_common( device, context, queue, common_formats,
-                                     nformats, targets, ntargets, sizes, nsizes);
+    return test_images_get_info_common(device, context, queue, common_formats,
+                                       nformats, targets, ntargets, sizes,
+                                       nsizes);
 }
-
diff --git a/test_conformance/gl/test_images_1Darray.cpp b/test_conformance/gl/test_images_1Darray.cpp
index 1914a457..daa2efa8 100644
--- a/test_conformance/gl/test_images_1Darray.cpp
+++ b/test_conformance/gl/test_images_1Darray.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,75 +16,79 @@
 #include "common.h"
 #include "testBase.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 #include <algorithm>
 
 using namespace std;
 void calc_1D_array_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit array size according to GL device properties
-  GLint maxTextureLayers = 16, maxTextureSize = 4096;
-  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+    // Need to limit array size according to GL device properties
+    GLint maxTextureLayers = 16, maxTextureSize = 4096;
+    glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
 
-  RandomSeed seed( gRandomSeed );
+    RandomSeed seed(gRandomSeed);
 
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
-    sizes[i].depth  = 1;
-  }
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].height =
+            random_in_range(2, min(maxTextureLayers, 1 << (i + 4)), seed);
+        sizes[i].depth = 1;
+    }
 }
 
-int test_images_read_1Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int )
+int test_images_read_1Darray(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_1D_array_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_1D_array_size_descriptors(sizes, nsizes);
 
-  return test_images_read_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+    return test_images_read_common(device, context, queue, common_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_write_1Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write_1Darray(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_1D_array_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_1D_array_size_descriptors(sizes, nsizes);
 
-  return test_images_write_common( device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    return test_images_write_common(device, context, queue, common_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_1Darray_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int )
+int test_images_1Darray_getinfo(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_1D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_1D_array_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_1D_array_size_descriptors(sizes, nsizes);
 
-  return test_images_get_info_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+    return test_images_get_info_common(device, context, queue, common_formats,
+                                       nformats, targets, ntargets, sizes,
+                                       nsizes);
 }
 \ No newline at end of file
diff --git a/test_conformance/gl/test_images_2D.cpp b/test_conformance/gl/test_images_2D.cpp
index fbff31c4..63ea31e2 100644
--- a/test_conformance/gl/test_images_2D.cpp
+++ b/test_conformance/gl/test_images_2D.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 #include "testBase.h"
 #include "common.h"
 
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
 #include <OpenGL/glu.h>
 #else
 #include <GL/glu.h>
@@ -31,76 +31,77 @@ using namespace std;
 
 void calc_2D_test_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit array size according to GL device properties
-  // Need to limit texture size according to GL device properties
-  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, size;
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
-
-  size = min(maxTextureSize, maxTextureRectangleSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].depth  = 1;
-  }
+    // Need to limit array size according to GL device properties
+    // Need to limit texture size according to GL device properties
+    GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, size;
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+    glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+
+    size = min(maxTextureSize, maxTextureRectangleSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].height = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].depth = 1;
+    }
 }
 
 void calc_cube_test_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit array size according to GL device properties
-  // Need to limit texture size according to GL device properties
-  GLint maxQubeMapSize = 4096;
-  glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &maxQubeMapSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = sizes[i].height = random_in_range( 2, min(maxQubeMapSize, 1<<(i+4)), seed );
-    sizes[i].depth  = 1;
-  }
+    // Need to limit array size according to GL device properties
+    // Need to limit texture size according to GL device properties
+    GLint maxQubeMapSize = 4096;
+    glGetIntegerv(GL_MAX_CUBE_MAP_TEXTURE_SIZE, &maxQubeMapSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width = sizes[i].height =
+            random_in_range(2, min(maxQubeMapSize, 1 << (i + 4)), seed);
+        sizes[i].depth = 1;
+    }
 }
 
-int test_images_read_2D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_read_2D(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_2D_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_2D_test_size_descriptors(sizes, nsizes);
 
-  return test_images_read_common(device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes);
+    return test_images_read_common(device, context, queue, common_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_read_cube( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_read_cube(cl_device_id device, cl_context context,
+                          cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = {
-    GL_TEXTURE_CUBE_MAP_POSITIVE_X,
-    GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
-    GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_Z };
-
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
-
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_cube_test_size_descriptors(sizes, nsizes);
-
-  return test_images_read_common(device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes);
+    GLenum targets[] = {
+        GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
+    };
+
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_cube_test_size_descriptors(sizes, nsizes);
+
+    return test_images_read_common(device, context, queue, common_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
 #pragma mark -
@@ -108,81 +109,77 @@ int test_images_read_cube( cl_device_id device, cl_context context,
 
 #include "common.h"
 
-int test_images_write( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_2D_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_2D_test_size_descriptors(sizes, nsizes);
 
-  return test_images_write_common( device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    return test_images_write_common(device, context, queue, common_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_write_cube( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write_cube(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
-
-  GLenum targets[] = {
-    GL_TEXTURE_CUBE_MAP_POSITIVE_X,
-    GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
-    GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
-  };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_cube_test_size_descriptors(sizes, nsizes);
-
-  return test_images_write_common( device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+    GLenum targets[] = {
+        GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
+    };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_cube_test_size_descriptors(sizes, nsizes);
+
+    return test_images_write_common(device, context, queue, common_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
 #pragma mark -
 #pragma mark _2D get info tests
 
-int test_images_2D_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_2D_getinfo(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_2D_test_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_2D_test_size_descriptors(sizes, nsizes);
 
-  return test_images_get_info_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+    return test_images_get_info_common(device, context, queue, common_formats,
+                                       nformats, targets, ntargets, sizes,
+                                       nsizes);
 }
 
-int test_images_cube_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_cube_getinfo(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int numElements)
 {
     GLenum targets[] = {
-    GL_TEXTURE_CUBE_MAP_POSITIVE_X,
-    GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
-    GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
-    GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
-  };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
-
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_cube_test_size_descriptors(sizes, nsizes);
-
-  return test_images_get_info_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+        GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+        GL_TEXTURE_CUBE_MAP_POSITIVE_Z, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+        GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Z
+    };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_cube_test_size_descriptors(sizes, nsizes);
+
+    return test_images_get_info_common(device, context, queue, common_formats,
+                                       nformats, targets, ntargets, sizes,
+                                       nsizes);
 }
diff --git a/test_conformance/gl/test_images_2Darray.cpp b/test_conformance/gl/test_images_2Darray.cpp
index bb7095d1..20ca7144 100644
--- a/test_conformance/gl/test_images_2Darray.cpp
+++ b/test_conformance/gl/test_images_2Darray.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,11 +16,11 @@
 #include "common.h"
 #include "testBase.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 #include <algorithm>
 
@@ -28,66 +28,71 @@ using namespace std;
 
 void calc_2D_array_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit array size according to GL device properties
-  GLint maxTextureLayers = 16, maxTextureSize = 4096;
-  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
-  }
+    // Need to limit array size according to GL device properties
+    GLint maxTextureLayers = 16, maxTextureSize = 4096;
+    glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].height =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].depth =
+            random_in_range(2, min(maxTextureLayers, 1 << (i + 4)), seed);
+    }
 }
 
-int test_images_read_2Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int )
+int test_images_read_2Darray(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_2D_array_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_2D_array_size_descriptors(sizes, nsizes);
 
-  return test_images_read_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+    return test_images_read_common(device, context, queue, common_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_write_2Darray( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write_2Darray(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int numElements)
 {
-  // FIXME: Query for 2D image array write support.
+    // FIXME: Query for 2D image array write support.
 
-  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_2D_array_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_2D_array_size_descriptors(sizes, nsizes);
 
-  return test_images_write_common( device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    return test_images_write_common(device, context, queue, common_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_2Darray_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int )
+int test_images_2Darray_getinfo(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int)
 {
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_2D_array_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_2D_array_size_descriptors(sizes, nsizes);
 
-  return test_images_get_info_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+    return test_images_get_info_common(device, context, queue, common_formats,
+                                       nformats, targets, ntargets, sizes,
+                                       nsizes);
 }
 \ No newline at end of file
diff --git a/test_conformance/gl/test_images_3D.cpp b/test_conformance/gl/test_images_3D.cpp
index 8abaa096..220cd0ca 100644
--- a/test_conformance/gl/test_images_3D.cpp
+++ b/test_conformance/gl/test_images_3D.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,11 +16,11 @@
 #include "testBase.h"
 #include "common.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 #include <algorithm>
 
@@ -31,77 +31,85 @@ using namespace std;
 
 void calc_3D_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit array size according to GL device properties
-  GLint maxTextureSize = 2048;
-  glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &maxTextureSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].depth  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-  }
+    // Need to limit array size according to GL device properties
+    GLint maxTextureSize = 2048;
+    glGetIntegerv(GL_MAX_3D_TEXTURE_SIZE, &maxTextureSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].height =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].depth =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+    }
 }
 
-int test_images_read_3D( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_images_read_3D(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int numElements)
 {
     GLenum targets[] = { GL_TEXTURE_3D };
-  size_t ntargets = 1;
+    size_t ntargets = 1;
 
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_3D_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_3D_size_descriptors(sizes, nsizes);
 
-  return test_images_read_common(device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes);
+    return test_images_read_common(device, context, queue, common_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
 #pragma mark -
 #pragma marm _3D write test
 
-int test_images_write_3D( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write_3D(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int numElements)
 {
-  // TODO: Perhaps the expected behavior is to FAIL if 3D images are
-  //       unsupported?
+    // TODO: Perhaps the expected behavior is to FAIL if 3D images are
+    //       unsupported?
 
-  if (!is_extension_available(device, "cl_khr_3d_image_writes")) {
-    log_info("This device does not support 3D image writes.  Skipping test.\n");
-    return 0;
-  }
+    if (!is_extension_available(device, "cl_khr_3d_image_writes"))
+    {
+        log_info(
+            "This device does not support 3D image writes.  Skipping test.\n");
+        return 0;
+    }
 
-  GLenum targets[] = { GL_TEXTURE_3D };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_3D };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
 
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_3D_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_3D_size_descriptors(sizes, nsizes);
 
-  return test_images_write_common( device, context, queue, common_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    return test_images_write_common(device, context, queue, common_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
 #pragma mark -
 #pragma mark _3D get info test
 
-int test_images_3D_getinfo( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_3D_getinfo(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements)
 {
-  GLenum targets[] = { GL_TEXTURE_3D };
-  size_t ntargets = 1;
+    GLenum targets[] = { GL_TEXTURE_3D };
+    size_t ntargets = 1;
 
-  size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    size_t nformats = sizeof(common_formats) / sizeof(common_formats[0]);
 
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_3D_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_3D_size_descriptors(sizes, nsizes);
 
     return test_images_get_info_common(device, context, queue, common_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+                                       nformats, targets, ntargets, sizes,
+                                       nsizes);
 }
diff --git a/test_conformance/gl/test_images_depth.cpp b/test_conformance/gl/test_images_depth.cpp
index f6cded47..05265cc6 100644
--- a/test_conformance/gl/test_images_depth.cpp
+++ b/test_conformance/gl/test_images_depth.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 #include "testBase.h"
 #include "common.h"
 
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
 #include <OpenGL/glu.h>
 #else
 #include <GL/glu.h>
@@ -32,129 +32,140 @@ using namespace std;
 
 void calc_depth_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit texture size according to GL device properties
-  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, size;
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
-
-  size = min(maxTextureSize, maxTextureRectangleSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].depth  = 1;
-  }
+    // Need to limit texture size according to GL device properties
+    GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, size;
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+    glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+
+    size = min(maxTextureSize, maxTextureRectangleSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].height = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].depth = 1;
+    }
 }
 
 void calc_depth_array_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit texture size according to GL device properties
-  GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096, maxTextureLayers = 16, size;
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-  glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
-  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
-
-  size = min(maxTextureSize, maxTextureRectangleSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(size, 1<<(i+4)), seed );
-    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
-  }
+    // Need to limit texture size according to GL device properties
+    GLint maxTextureSize = 4096, maxTextureRectangleSize = 4096,
+          maxTextureLayers = 16, size;
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+    glGetIntegerv(GL_MAX_RECTANGLE_TEXTURE_SIZE_EXT, &maxTextureRectangleSize);
+    glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+
+    size = min(maxTextureSize, maxTextureRectangleSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].height = random_in_range(2, min(size, 1 << (i + 4)), seed);
+        sizes[i].depth =
+            random_in_range(2, min(maxTextureLayers, 1 << (i + 4)), seed);
+    }
 }
 
-int test_images_read_2D_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_read_2D_depth(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int numElements)
 {
-  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
-    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
-    return 0;
-  }
+    if (!is_extension_available(device, "cl_khr_gl_depth_images"))
+    {
+        log_info("Test not run because 'cl_khr_gl_depth_images' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
 
-  RandomSeed seed( gRandomSeed );
+    RandomSeed seed(gRandomSeed);
 
-  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+    size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_depth_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_depth_size_descriptors(sizes, nsizes);
 
-  return test_images_read_common(device, context, queue, depth_formats,
-    nformats, targets, ntargets, sizes, nsizes);
+    return test_images_read_common(device, context, queue, depth_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
 #pragma mark -
 #pragma mark _2D depth write tests
 
 
-int test_images_write_2D_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write_2D_depth(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int numElements)
 {
-  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
-    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
-    return 0;
-  }
-
-  GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
-
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_depth_size_descriptors(sizes, nsizes);
-
-  return test_images_write_common( device, context, queue, depth_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    if (!is_extension_available(device, "cl_khr_gl_depth_images"))
+    {
+        log_info("Test not run because 'cl_khr_gl_depth_images' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
+
+    GLenum targets[] = { GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE_EXT };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_depth_size_descriptors(sizes, nsizes);
+
+    return test_images_write_common(device, context, queue, depth_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_read_2Darray_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int )
+int test_images_read_2Darray_depth(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int)
 {
-  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
-    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
-    return 0;
-  }
-
-  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
-  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_depth_array_size_descriptors(sizes, nsizes);
-
-  return test_images_read_common(device, context, queue, depth_formats,
-      nformats, targets, ntargets, sizes, nsizes);
+    if (!is_extension_available(device, "cl_khr_gl_depth_images"))
+    {
+        log_info("Test not run because 'cl_khr_gl_depth_images' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
+
+    size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_depth_array_size_descriptors(sizes, nsizes);
+
+    return test_images_read_common(device, context, queue, depth_formats,
+                                   nformats, targets, ntargets, sizes, nsizes);
 }
 
-int test_images_write_2Darray_depth( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_write_2Darray_depth(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int numElements)
 {
-  if (!is_extension_available(device, "cl_khr_gl_depth_images")) {
-    log_info("Test not run because 'cl_khr_gl_depth_images' extension is not supported by the tested device\n");
-    return 0;
-  }
+    if (!is_extension_available(device, "cl_khr_gl_depth_images"))
+    {
+        log_info("Test not run because 'cl_khr_gl_depth_images' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
 
-  // FIXME: Query for 2D image array write support.
+    // FIXME: Query for 2D image array write support.
 
-  GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
-  size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+    GLenum targets[] = { GL_TEXTURE_2D_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    size_t nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
 
-  const size_t nsizes = 6;
-  sizevec_t sizes[nsizes];
-  calc_depth_array_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 6;
+    sizevec_t sizes[nsizes];
+    calc_depth_array_size_descriptors(sizes, nsizes);
 
-  return test_images_write_common( device, context, queue, depth_formats,
-    nformats, targets, ntargets, sizes, nsizes );
+    return test_images_write_common(device, context, queue, depth_formats,
+                                    nformats, targets, ntargets, sizes, nsizes);
 }
-
diff --git a/test_conformance/gl/test_images_getinfo_common.cpp b/test_conformance/gl/test_images_getinfo_common.cpp
index 2322c269..836eb067 100644
--- a/test_conformance/gl/test_images_getinfo_common.cpp
+++ b/test_conformance/gl/test_images_getinfo_common.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,74 +16,71 @@
 #include "testBase.h"
 #include "common.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 
-extern int supportsHalf(cl_context context, bool* supports_half);
+extern int supportsHalf(cl_context context, bool *supports_half);
 
-static int test_image_info( cl_context context, cl_command_queue queue,
-  GLenum glTarget, GLuint glTexture, size_t imageWidth, size_t imageHeight,
-  size_t imageDepth, cl_image_format *outFormat, ExplicitType *outType,
-  void **outResultBuffer )
+static int test_image_info(cl_context context, cl_command_queue queue,
+                           GLenum glTarget, GLuint glTexture, size_t imageWidth,
+                           size_t imageHeight, size_t imageDepth,
+                           cl_image_format *outFormat, ExplicitType *outType,
+                           void **outResultBuffer)
 {
-  clMemWrapper streams[ 2 ];
-
-  int error;
-
-  // Create a CL image from the supplied GL texture
-  streams[ 0 ] = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY,
-    glTarget, 0, glTexture, &error );
-  if( error != CL_SUCCESS )
-  {
-    print_error( error, "Unable to create CL image from GL texture" );
-    GLint fmt;
-    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
-    log_error( "    Supplied GL texture was format %s\n", GetGLFormatName( fmt ) );
-    return error;
-  }
-
-  // Determine data type and format that CL came up with
-  error = clGetImageInfo( streams[ 0 ], CL_IMAGE_FORMAT,
-    sizeof( cl_image_format ), outFormat, NULL );
-  test_error( error, "Unable to get CL image format" );
-
-  cl_gl_object_type object_type;
-  switch (glTarget) {
-    case GL_TEXTURE_1D:
-      object_type = CL_GL_OBJECT_TEXTURE1D;
-      break;
-    case GL_TEXTURE_BUFFER:
-      object_type = CL_GL_OBJECT_TEXTURE_BUFFER;
-      break;
-    case GL_TEXTURE_1D_ARRAY:
-      object_type = CL_GL_OBJECT_TEXTURE1D_ARRAY;
-      break;
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      object_type = CL_GL_OBJECT_TEXTURE2D;
-      break;
-    case GL_TEXTURE_2D_ARRAY:
-      object_type = CL_GL_OBJECT_TEXTURE2D_ARRAY;
-      break;
-    case GL_TEXTURE_3D:
-      object_type = CL_GL_OBJECT_TEXTURE3D;
-      break;
-    default:
-      log_error("Unsupported texture target.");
-      return 1;
-  }
-
-  return CheckGLObjectInfo(streams[0], object_type, glTexture, glTarget, 0);
+    clMemWrapper streams[2];
+
+    int error;
+
+    // Create a CL image from the supplied GL texture
+    streams[0] = (*clCreateFromGLTexture_ptr)(context, CL_MEM_READ_ONLY,
+                                              glTarget, 0, glTexture, &error);
+    if (error != CL_SUCCESS)
+    {
+        print_error(error, "Unable to create CL image from GL texture");
+        GLint fmt;
+        glGetTexLevelParameteriv(glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt);
+        log_error("    Supplied GL texture was format %s\n",
+                  GetGLFormatName(fmt));
+        return error;
+    }
+
+    // Determine data type and format that CL came up with
+    error = clGetImageInfo(streams[0], CL_IMAGE_FORMAT, sizeof(cl_image_format),
+                           outFormat, NULL);
+    test_error(error, "Unable to get CL image format");
+
+    cl_gl_object_type object_type;
+    switch (glTarget)
+    {
+        case GL_TEXTURE_1D: object_type = CL_GL_OBJECT_TEXTURE1D; break;
+        case GL_TEXTURE_BUFFER:
+            object_type = CL_GL_OBJECT_TEXTURE_BUFFER;
+            break;
+        case GL_TEXTURE_1D_ARRAY:
+            object_type = CL_GL_OBJECT_TEXTURE1D_ARRAY;
+            break;
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+            object_type = CL_GL_OBJECT_TEXTURE2D;
+            break;
+        case GL_TEXTURE_2D_ARRAY:
+            object_type = CL_GL_OBJECT_TEXTURE2D_ARRAY;
+            break;
+        case GL_TEXTURE_3D: object_type = CL_GL_OBJECT_TEXTURE3D; break;
+        default: log_error("Unsupported texture target."); return 1;
+    }
+
+    return CheckGLObjectInfo(streams[0], object_type, glTexture, glTarget, 0);
 }
 
 static int test_image_format_get_info(cl_context context,
@@ -92,110 +89,119 @@ static int test_image_format_get_info(cl_context context,
                                       GLenum target, const format *fmt,
                                       MTdata data)
 {
-  int error = 0;
-
-  // If we're testing a half float format, then we need to determine the
-  // rounding mode of this machine.  Punt if we fail to do so.
-
-  if( fmt->type == kHalf )
-  {
-    if( DetectFloatToHalfRoundingMode(queue) )
-      return 0;
-    bool supports_half = false;
-    error = supportsHalf(context, &supports_half);
-    if( error != 0 )
-      return error;
-    if (!supports_half) return 0;
-  }
-
-  size_t w = width, h = height, d = depth;
-
-  // Unpack the format and use it, along with the target, to create an
-  // appropriate GL texture.
-
-  GLenum gl_fmt          = fmt->formattype;
-  GLenum gl_internal_fmt = fmt->internal;
-  GLenum gl_type         = fmt->datatype;
-  ExplicitType type      = fmt->type;
-
-  glTextureWrapper texture;
-  glBufferWrapper glbuf;
-
-  // If we're testing a half float format, then we need to determine the
-  // rounding mode of this machine.  Punt if we fail to do so.
-
-  if( fmt->type == kHalf )
-    if( DetectFloatToHalfRoundingMode(queue) )
-      return 1;
-
-  // Use the correct texture creation function depending on the target, and
-  // adjust width, height, depth as appropriate so subsequent size calculations
-  // succeed.
-
-  switch (target) {
-    case GL_TEXTURE_1D:
-      h = 1; d = 1;
-      CreateGLTexture1D( width, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
-      break;
-    case GL_TEXTURE_BUFFER:
-      h = 1; d = 1;
-      CreateGLTextureBuffer( width, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &glbuf, &error, false, data );
-      break;
-    case GL_TEXTURE_1D_ARRAY:
-      d = 1;
-      CreateGLTexture1DArray( width, height, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
-      break;
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
-      d = 1;
-      CreateGLTexture2D( width, height, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
-      break;
-    case GL_TEXTURE_2D_ARRAY:
-      CreateGLTexture2DArray( width, height, depth, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, false, data );
-      break;
-    case GL_TEXTURE_3D:
-      d = 1;
-      CreateGLTexture3D( width, height, depth, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, data, false );
-      break;
-    default:
-      log_error("Unsupported texture target.\n");
-      return 1;
-  }
-
-  if ( error == -2 ) {
-    log_info("OpenGL texture couldn't be created, because a texture is too big. Skipping test.\n");
-    return 0;
-  }
-
-  if ( error != 0 ) {
-    if ((gl_fmt == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport())) {
-      log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
-        "Skipping test.\n");
-      return 0;
-    } else {
-      return error;
+    int error = 0;
+
+    // If we're testing a half float format, then we need to determine the
+    // rounding mode of this machine.  Punt if we fail to do so.
+
+    if (fmt->type == kHalf)
+    {
+        if (DetectFloatToHalfRoundingMode(queue)) return 0;
+        bool supports_half = false;
+        error = supportsHalf(context, &supports_half);
+        if (error != 0) return error;
+        if (!supports_half) return 0;
     }
-  }
 
-  cl_image_format clFormat;
-  ExplicitType actualType;
-  char *outBuffer;
+    size_t w = width, h = height, d = depth;
+
+    // Unpack the format and use it, along with the target, to create an
+    // appropriate GL texture.
+
+    GLenum gl_fmt = fmt->formattype;
+    GLenum gl_internal_fmt = fmt->internal;
+    GLenum gl_type = fmt->datatype;
+    ExplicitType type = fmt->type;
+
+    glTextureWrapper texture;
+    glBufferWrapper glbuf;
+
+    // If we're testing a half float format, then we need to determine the
+    // rounding mode of this machine.  Punt if we fail to do so.
+
+    if (fmt->type == kHalf)
+        if (DetectFloatToHalfRoundingMode(queue)) return 1;
+
+    // Use the correct texture creation function depending on the target, and
+    // adjust width, height, depth as appropriate so subsequent size
+    // calculations succeed.
+
+    switch (target)
+    {
+        case GL_TEXTURE_1D:
+            h = 1;
+            d = 1;
+            CreateGLTexture1D(width, target, gl_fmt, gl_internal_fmt, gl_type,
+                              type, &texture, &error, false, data);
+            break;
+        case GL_TEXTURE_BUFFER:
+            h = 1;
+            d = 1;
+            CreateGLTextureBuffer(width, target, gl_fmt, gl_internal_fmt,
+                                  gl_type, type, &texture, &glbuf, &error,
+                                  false, data);
+            break;
+        case GL_TEXTURE_1D_ARRAY:
+            d = 1;
+            CreateGLTexture1DArray(width, height, target, gl_fmt,
+                                   gl_internal_fmt, gl_type, type, &texture,
+                                   &error, false, data);
+            break;
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+        case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z:
+            d = 1;
+            CreateGLTexture2D(width, height, target, gl_fmt, gl_internal_fmt,
+                              gl_type, type, &texture, &error, false, data);
+            break;
+        case GL_TEXTURE_2D_ARRAY:
+            CreateGLTexture2DArray(width, height, depth, target, gl_fmt,
+                                   gl_internal_fmt, gl_type, type, &texture,
+                                   &error, false, data);
+            break;
+        case GL_TEXTURE_3D:
+            d = 1;
+            CreateGLTexture3D(width, height, depth, target, gl_fmt,
+                              gl_internal_fmt, gl_type, type, &texture, &error,
+                              data, false);
+            break;
+        default: log_error("Unsupported texture target.\n"); return 1;
+    }
 
-  // Perform the info check:
-  return test_image_info( context, queue, target, texture, w, h, d, &clFormat,
-    &actualType, (void **)&outBuffer );
+    if (error == -2)
+    {
+        log_info("OpenGL texture couldn't be created, because a texture is too "
+                 "big. Skipping test.\n");
+        return 0;
+    }
+
+    if (error != 0)
+    {
+        if ((gl_fmt == GL_RGBA_INTEGER_EXT)
+            && (!CheckGLIntegerExtensionSupport()))
+        {
+            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+                     "Skipping test.\n");
+            return 0;
+        }
+        else
+        {
+            return error;
+        }
+    }
+
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
+
+    // Perform the info check:
+    return test_image_info(context, queue, target, texture, w, h, d, &clFormat,
+                           &actualType, (void **)&outBuffer);
 }
 
 int test_images_get_info_common(cl_device_id device, cl_context context,
@@ -204,60 +210,65 @@ int test_images_get_info_common(cl_device_id device, cl_context context,
                                 size_t ntargets, sizevec_t *sizes,
                                 size_t nsizes)
 {
-  int error = 0;
-  RandomSeed seed(gRandomSeed);
-
-  // First, ensure this device supports images.
-
-  if (checkForImageSupport(device)) {
-    log_info("Device does not support images.  Skipping test.\n");
-    return 0;
-  }
-
-  size_t fidx, tidx, sidx;
+    int error = 0;
+    RandomSeed seed(gRandomSeed);
 
-  // Test each format on every target, every size.
+    // First, ensure this device supports images.
 
-  for ( fidx = 0; fidx < nformats; fidx++ ) {
-    for ( tidx = 0; tidx < ntargets; tidx++ ) {
-
-      if ( formats[ fidx ].datatype == GL_UNSIGNED_INT_2_10_10_10_REV )
-      {
-        // Check if the RGB 101010 format is supported
-        if ( is_rgb_101010_supported( context, targets[ tidx ] ) == 0 )
-          break; // skip
-      }
-
-      log_info( "Testing image info for GL format %s : %s : %s : %s\n",
-        GetGLTargetName( targets[ tidx ] ),
-        GetGLFormatName( formats[ fidx ].internal ),
-        GetGLBaseFormatName( formats[ fidx ].formattype ),
-        GetGLTypeName( formats[ fidx ].datatype ) );
+    if (checkForImageSupport(device))
+    {
+        log_info("Device does not support images.  Skipping test.\n");
+        return 0;
+    }
 
-      for ( sidx = 0; sidx < nsizes; sidx++ ) {
+    size_t fidx, tidx, sidx;
 
-        // Test this format + size:
+    // Test each format on every target, every size.
 
-        if ( test_image_format_get_info(context, queue,
-                                        sizes[sidx].width, sizes[sidx].height, sizes[sidx].depth,
-                                        targets[tidx], &formats[fidx], seed) )
+    for (fidx = 0; fidx < nformats; fidx++)
+    {
+        for (tidx = 0; tidx < ntargets; tidx++)
         {
-          // We land here in the event of test failure.
 
-          log_error( "ERROR: Image info test failed for %s : %s : %s : %s\n\n",
-            GetGLTargetName( targets[ tidx ] ),
-            GetGLFormatName( formats[ fidx ].internal ),
-            GetGLBaseFormatName( formats[ fidx ].formattype ),
-            GetGLTypeName( formats[ fidx ].datatype ) );
-          error++;
-
-          // Skip the other sizes for this format.
-
-          break;
+            if (formats[fidx].datatype == GL_UNSIGNED_INT_2_10_10_10_REV)
+            {
+                // Check if the RGB 101010 format is supported
+                if (is_rgb_101010_supported(context, targets[tidx]) == 0)
+                    break; // skip
+            }
+
+            log_info("Testing image info for GL format %s : %s : %s : %s\n",
+                     GetGLTargetName(targets[tidx]),
+                     GetGLFormatName(formats[fidx].internal),
+                     GetGLBaseFormatName(formats[fidx].formattype),
+                     GetGLTypeName(formats[fidx].datatype));
+
+            for (sidx = 0; sidx < nsizes; sidx++)
+            {
+
+                // Test this format + size:
+
+                if (test_image_format_get_info(
+                        context, queue, sizes[sidx].width, sizes[sidx].height,
+                        sizes[sidx].depth, targets[tidx], &formats[fidx], seed))
+                {
+                    // We land here in the event of test failure.
+
+                    log_error("ERROR: Image info test failed for %s : %s : %s "
+                              ": %s\n\n",
+                              GetGLTargetName(targets[tidx]),
+                              GetGLFormatName(formats[fidx].internal),
+                              GetGLBaseFormatName(formats[fidx].formattype),
+                              GetGLTypeName(formats[fidx].datatype));
+                    error++;
+
+                    // Skip the other sizes for this format.
+
+                    break;
+                }
+            }
         }
-      }
     }
-  }
 
-  return error;
+    return error;
 }
diff --git a/test_conformance/gl/test_images_multisample.cpp b/test_conformance/gl/test_images_multisample.cpp
index 99f9ff2e..bfb04ab6 100644
--- a/test_conformance/gl/test_images_multisample.cpp
+++ b/test_conformance/gl/test_images_multisample.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,11 +16,11 @@
 #include "common.h"
 #include "testBase.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 
 #include <algorithm>
@@ -29,90 +29,109 @@ using namespace std;
 
 void calc_2D_multisample_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit texture size according to GL device properties
-  GLint maxTextureSize = 4096;
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].depth  = 1;
-  }
+    // Need to limit texture size according to GL device properties
+    GLint maxTextureSize = 4096;
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].height =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].depth = 1;
+    }
 }
 
 void calc_2D_array_multisample_size_descriptors(sizevec_t* sizes, size_t nsizes)
 {
-  // Need to limit array size according to GL device properties
-  GLint maxTextureLayers = 16, maxTextureSize = 4096;
-  glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
-  glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
-
-  RandomSeed seed( gRandomSeed );
-
-  // Generate some random sizes (within reasonable ranges)
-  for (size_t i = 0; i < nsizes; i++) {
-    sizes[i].width  = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].height = random_in_range( 2, min(maxTextureSize, 1<<(i+4)), seed );
-    sizes[i].depth  = random_in_range( 2, min(maxTextureLayers, 1<<(i+4)), seed );
-  }
+    // Need to limit array size according to GL device properties
+    GLint maxTextureLayers = 16, maxTextureSize = 4096;
+    glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &maxTextureLayers);
+    glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxTextureSize);
+
+    RandomSeed seed(gRandomSeed);
+
+    // Generate some random sizes (within reasonable ranges)
+    for (size_t i = 0; i < nsizes; i++)
+    {
+        sizes[i].width =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].height =
+            random_in_range(2, min(maxTextureSize, 1 << (i + 4)), seed);
+        sizes[i].depth =
+            random_in_range(2, min(maxTextureLayers, 1 << (i + 4)), seed);
+    }
 }
 
-int test_images_read_2D_multisample( cl_device_id device, cl_context context,
-  cl_command_queue queue, int numElements )
+int test_images_read_2D_multisample(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int numElements)
 {
-  if (!is_extension_available(device, "cl_khr_gl_msaa_sharing")) {
-    log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is not supported by the tested device\n");
-    return 0;
-  }
+    if (!is_extension_available(device, "cl_khr_gl_msaa_sharing"))
+    {
+        log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
 
-  glEnable(GL_MULTISAMPLE);
+    glEnable(GL_MULTISAMPLE);
 
-  const size_t nsizes = 8;
-  sizevec_t sizes[nsizes];
-  calc_2D_multisample_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 8;
+    sizevec_t sizes[nsizes];
+    calc_2D_multisample_size_descriptors(sizes, nsizes);
 
-  size_t nformats;
+    size_t nformats;
 
-  GLenum targets[] = { GL_TEXTURE_2D_MULTISAMPLE };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_2D_MULTISAMPLE };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  nformats = sizeof(common_formats) / sizeof(common_formats[0]);
-  int ret_common = test_images_read_common(device, context, queue, common_formats, nformats, targets, ntargets, sizes, nsizes);
+    nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    int ret_common =
+        test_images_read_common(device, context, queue, common_formats,
+                                nformats, targets, ntargets, sizes, nsizes);
 
-  nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
-  int ret_depth = test_images_read_common(device, context, queue, depth_formats, nformats, targets, ntargets, sizes, nsizes);
+    nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+    int ret_depth =
+        test_images_read_common(device, context, queue, depth_formats, nformats,
+                                targets, ntargets, sizes, nsizes);
 
-  return (ret_common) ? ret_common : ret_depth;
+    return (ret_common) ? ret_common : ret_depth;
 }
 
-int test_images_read_2Darray_multisample( cl_device_id device, cl_context context,
-                             cl_command_queue queue, int )
+int test_images_read_2Darray_multisample(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue, int)
 {
-  if (!is_extension_available(device, "cl_khr_gl_msaa_sharing")) {
-    log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is not supported by the tested device\n");
-    return 0;
-  }
+    if (!is_extension_available(device, "cl_khr_gl_msaa_sharing"))
+    {
+        log_info("Test not run because 'cl_khr_gl_msaa_sharing' extension is "
+                 "not supported by the tested device\n");
+        return 0;
+    }
 
-  glEnable(GL_MULTISAMPLE);
+    glEnable(GL_MULTISAMPLE);
 
-  const size_t nsizes = 4;
-  sizevec_t sizes[nsizes];
-  calc_2D_array_multisample_size_descriptors(sizes, nsizes);
+    const size_t nsizes = 4;
+    sizevec_t sizes[nsizes];
+    calc_2D_array_multisample_size_descriptors(sizes, nsizes);
 
-  size_t nformats;
+    size_t nformats;
 
-  GLenum targets[] = { GL_TEXTURE_2D_MULTISAMPLE_ARRAY };
-  size_t ntargets = sizeof(targets) / sizeof(targets[0]);
+    GLenum targets[] = { GL_TEXTURE_2D_MULTISAMPLE_ARRAY };
+    size_t ntargets = sizeof(targets) / sizeof(targets[0]);
 
-  nformats = sizeof(common_formats) / sizeof(common_formats[0]);
-  int ret_common = test_images_read_common(device, context, queue, common_formats, nformats, targets, ntargets, sizes, nsizes);
+    nformats = sizeof(common_formats) / sizeof(common_formats[0]);
+    int ret_common =
+        test_images_read_common(device, context, queue, common_formats,
+                                nformats, targets, ntargets, sizes, nsizes);
 
-  nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
-  int ret_depth = test_images_read_common(device, context, queue, depth_formats, nformats, targets, ntargets, sizes, nsizes);
+    nformats = sizeof(depth_formats) / sizeof(depth_formats[0]);
+    int ret_depth =
+        test_images_read_common(device, context, queue, depth_formats, nformats,
+                                targets, ntargets, sizes, nsizes);
 
-  return (ret_common) ? ret_common : ret_depth;
+    return (ret_common) ? ret_common : ret_depth;
 }
-
diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp
index fe2a529b..d12936e8 100644
--- a/test_conformance/gl/test_images_read_common.cpp
+++ b/test_conformance/gl/test_images_read_common.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,17 +16,18 @@
 #include "common.h"
 #include "testBase.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 
-extern int supportsHalf(cl_context context, bool* supports_half);
-extern int supportsMsaa(cl_context context, bool* supports_msaa);
-extern int supportsDepth(cl_context context, bool* supports_depth);
+extern int supportsHalf(cl_context context, bool *supports_half);
+extern int supportsMsaa(cl_context context, bool *supports_msaa);
+extern int supportsDepth(cl_context context, bool *supports_depth);
 
+// clang-format off
 static const char *kernelpattern_image_read_1d =
 "__kernel void sample_test( read_only image1d_t source, sampler_t sampler, __global %s4 *results )\n"
 "{\n"
@@ -167,481 +168,536 @@ static const char *kernelpattern_image_multisample_read_2darray_depth =
   "         results[ offset ] = read_image%s( source, (int4)( tidX, tidY, tidZ, 1 ), sample );\n"
   "    }\n"
 "}\n";
+// clang-format on
+
+static const char *
+get_appropriate_kernel_for_target(GLenum target, cl_channel_order channel_order)
+{
 
-static const char* get_appropriate_kernel_for_target(GLenum target, cl_channel_order channel_order) {
-
-    switch (get_base_gl_target(target)) {
-    case GL_TEXTURE_1D:
-      return kernelpattern_image_read_1d;
-    case GL_TEXTURE_BUFFER:
-      return kernelpattern_image_read_1d_buffer;
-    case GL_TEXTURE_1D_ARRAY:
-      return kernelpattern_image_read_1darray;
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_2D:
-    case GL_COLOR_ATTACHMENT0:
-    case GL_RENDERBUFFER:
-    case GL_TEXTURE_CUBE_MAP:
+    switch (get_base_gl_target(target))
+    {
+        case GL_TEXTURE_1D: return kernelpattern_image_read_1d;
+        case GL_TEXTURE_BUFFER: return kernelpattern_image_read_1d_buffer;
+        case GL_TEXTURE_1D_ARRAY: return kernelpattern_image_read_1darray;
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_2D:
+        case GL_COLOR_ATTACHMENT0:
+        case GL_RENDERBUFFER:
+        case GL_TEXTURE_CUBE_MAP:
 #ifdef GL_VERSION_3_2
-    if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
-      return kernelpattern_image_read_2d_depth;
+            if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+                return kernelpattern_image_read_2d_depth;
 #endif
-      return kernelpattern_image_read_2d;
-    case GL_TEXTURE_2D_ARRAY:
+            return kernelpattern_image_read_2d;
+        case GL_TEXTURE_2D_ARRAY:
 #ifdef GL_VERSION_3_2
-      if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
-        return kernelpattern_image_read_2darray_depth;
+            if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+                return kernelpattern_image_read_2darray_depth;
 #endif
-      return kernelpattern_image_read_2darray;
-    case GL_TEXTURE_3D:
-      return kernelpattern_image_read_3d;
-    case GL_TEXTURE_2D_MULTISAMPLE:
+            return kernelpattern_image_read_2darray;
+        case GL_TEXTURE_3D: return kernelpattern_image_read_3d;
+        case GL_TEXTURE_2D_MULTISAMPLE:
 #ifdef GL_VERSION_3_2
-        if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
-          return kernelpattern_image_multisample_read_2d_depth;
+            if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+                return kernelpattern_image_multisample_read_2d_depth;
 #endif
-      return kernelpattern_image_multisample_read_2d;
-      break;
-    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+            return kernelpattern_image_multisample_read_2d;
+            break;
+        case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
 #ifdef GL_VERSION_3_2
-        if(channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
-          return kernelpattern_image_multisample_read_2darray_depth;
+            if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+                return kernelpattern_image_multisample_read_2darray_depth;
 #endif
-      return kernelpattern_image_multisample_read_2darray;
-      break;
-    default:
-      log_error("Unsupported texture target (%s); cannot determine "
-        "appropriate kernel.", GetGLTargetName(target));
-      return NULL;
-  }
+            return kernelpattern_image_multisample_read_2darray;
+            break;
+        default:
+            log_error("Unsupported texture target (%s); cannot determine "
+                      "appropriate kernel.",
+                      GetGLTargetName(target));
+            return NULL;
+    }
 }
 
-int test_cl_image_read( cl_context context, cl_command_queue queue,
-  GLenum gl_target, cl_mem image, size_t width, size_t height, size_t depth, size_t sampleNum,
-  cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+int test_cl_image_read(cl_context context, cl_command_queue queue,
+                       GLenum gl_target, cl_mem image, size_t width,
+                       size_t height, size_t depth, size_t sampleNum,
+                       cl_image_format *outFormat, ExplicitType *outType,
+                       void **outResultBuffer)
 {
-  clProgramWrapper program;
-  clKernelWrapper kernel;
-  clMemWrapper streams[ 2 ];
-
-  int error;
-  char kernelSource[2048];
-  char *programPtr;
-
-  // Use the image created from the GL texture.
-  streams[ 0 ] = image;
-
-  // Determine data type and format that CL came up with
-  error = clGetImageInfo( streams[ 0 ], CL_IMAGE_FORMAT, sizeof( cl_image_format ), outFormat, NULL );
-  test_error( error, "Unable to get CL image format" );
-
-  // Determine the number of samples
-  cl_uint samples = 0;
-  error = clGetImageInfo( streams[ 0 ], CL_IMAGE_NUM_SAMPLES, sizeof( samples ), &samples, NULL );
-  test_error( error, "Unable to get CL_IMAGE_NUM_SAMPLES" );
-
-  // Create the source
-  *outType = get_read_kernel_type( outFormat );
-  size_t channelSize = get_explicit_type_size( *outType );
-
-  const char* source = get_appropriate_kernel_for_target(gl_target, outFormat->image_channel_order);
-
-  sprintf( kernelSource, source, get_explicit_type_name( *outType ),
-    get_kernel_suffix( outFormat ) );
-
-  programPtr = kernelSource;
-  if( create_single_kernel_helper( context, &program, &kernel, 1,
-    (const char **)&programPtr, "sample_test", "" ) )
-  {
-    return -1;
-  }
-
-  // Create a vanilla output buffer
-  cl_device_id device;
-  error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(device), &device, NULL);
-  test_error( error, "Unable to get queue device" );
-
-  cl_ulong maxAllocSize = 0;
-  error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
-  test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE" );
-
-  size_t buffer_bytes = channelSize * get_channel_order_channel_count(outFormat->image_channel_order) * width * height * depth * sampleNum;
-  if (buffer_bytes > maxAllocSize) {
-    log_info("Output buffer size %d is too large for device (max alloc size %d) Skipping...\n",
-             (int)buffer_bytes, (int)maxAllocSize);
-    return 1;
-  }
-
-  streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, buffer_bytes, NULL, &error );
-  test_error( error, "Unable to create output buffer" );
-
-  /* Assign streams and execute */
-  clSamplerWrapper sampler = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
-  test_error( error, "Unable to create sampler" );
-
-  error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
-  test_error( error, "Unable to set kernel arguments" );
-  error = clSetKernelArg( kernel, 1, sizeof( sampler ), &sampler );
-  test_error( error, "Unable to set kernel arguments" );
-  error = clSetKernelArg( kernel, 2, sizeof( streams[ 1 ] ), &streams[ 1 ] );
-  test_error( error, "Unable to set kernel arguments" );
-
-  glFinish();
-
-  error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
-  test_error( error, "Unable to acquire GL obejcts");
-
-  // The ND range we use is a function of the dimensionality of the image.
-  size_t global_range[3] = { width, height, depth };
-  size_t *local_range = NULL;
-  int ndim = 1;
-
-  switch (get_base_gl_target(gl_target)) {
-    case GL_TEXTURE_1D:
-    case GL_TEXTURE_BUFFER:
-      ndim = 1;
-      break;
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_1D_ARRAY:
-    case GL_COLOR_ATTACHMENT0:
-    case GL_RENDERBUFFER:
-    case GL_TEXTURE_CUBE_MAP:
-      ndim = 2;
-      break;
-    case GL_TEXTURE_3D:
-    case GL_TEXTURE_2D_ARRAY:
-#ifdef GL_VERSION_3_2
-    case GL_TEXTURE_2D_MULTISAMPLE:
-    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-      ndim = 3;
-      break;
-#endif
-    default:
-      log_error("Test error: Unsupported texture target.\n");
-      return 1;
-  }
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[2];
+
+    int error;
+    char kernelSource[2048];
+    char *programPtr;
+
+    // Use the image created from the GL texture.
+    streams[0] = image;
+
+    // Determine data type and format that CL came up with
+    error = clGetImageInfo(streams[0], CL_IMAGE_FORMAT, sizeof(cl_image_format),
+                           outFormat, NULL);
+    test_error(error, "Unable to get CL image format");
+
+    // Determine the number of samples
+    cl_uint samples = 0;
+    error = clGetImageInfo(streams[0], CL_IMAGE_NUM_SAMPLES, sizeof(samples),
+                           &samples, NULL);
+    test_error(error, "Unable to get CL_IMAGE_NUM_SAMPLES");
+
+    // Create the source
+    *outType = get_read_kernel_type(outFormat);
+    size_t channelSize = get_explicit_type_size(*outType);
+
+    const char *source = get_appropriate_kernel_for_target(
+        gl_target, outFormat->image_channel_order);
+
+    sprintf(kernelSource, source, get_explicit_type_name(*outType),
+            get_kernel_suffix(outFormat));
+
+    programPtr = kernelSource;
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char **)&programPtr, "sample_test",
+                                    ""))
+    {
+        return -1;
+    }
+
+    // Create a vanilla output buffer
+    cl_device_id device;
+    error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(device),
+                                  &device, NULL);
+    test_error(error, "Unable to get queue device");
+
+    cl_ulong maxAllocSize = 0;
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE");
+
+    size_t buffer_bytes = channelSize
+        * get_channel_order_channel_count(outFormat->image_channel_order)
+        * width * height * depth * sampleNum;
+    if (buffer_bytes > maxAllocSize)
+    {
+        log_info("Output buffer size %d is too large for device (max alloc "
+                 "size %d) Skipping...\n",
+                 (int)buffer_bytes, (int)maxAllocSize);
+        return 1;
+    }
 
-  // 2D and 3D images have a special way to set the local size (legacy).
-  // Otherwise, we let CL select by leaving local_range as NULL.
+    streams[1] =
+        clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_bytes, NULL, &error);
+    test_error(error, "Unable to create output buffer");
 
-  if (gl_target == GL_TEXTURE_2D) {
-    local_range = (size_t*)malloc(sizeof(size_t) * ndim);
-    get_max_common_2D_work_group_size( context, kernel, global_range, local_range );
+    /* Assign streams and execute */
+    clSamplerWrapper sampler = clCreateSampler(
+        context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
+    test_error(error, "Unable to create sampler");
 
-  } else if (gl_target == GL_TEXTURE_3D) {
-    local_range = (size_t*)malloc(sizeof(size_t) * ndim);
-    get_max_common_3D_work_group_size( context, kernel, global_range, local_range );
-  }
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(sampler), &sampler);
+    test_error(error, "Unable to set kernel arguments");
+    error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set kernel arguments");
 
-  error = clEnqueueNDRangeKernel( queue, kernel, ndim, NULL, global_range,
-    local_range, 0, NULL, NULL );
-  test_error( error, "Unable to execute test kernel" );
+    glFinish();
 
-  error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ],
-    0, NULL, NULL );
-  test_error(error, "clEnqueueReleaseGLObjects failed");
+    error =
+        (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
+    test_error(error, "Unable to acquire GL obejcts");
 
-  // Read results from the CL buffer
-  *outResultBuffer = (void *)( new char[ channelSize * get_channel_order_channel_count(outFormat->image_channel_order) * width * height * depth * sampleNum] );
-  error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0,
-    channelSize * get_channel_order_channel_count(outFormat->image_channel_order) * width * height * depth * sampleNum, *outResultBuffer, 0, NULL, NULL );
-  test_error( error, "Unable to read output CL buffer!" );
+    // The ND range we use is a function of the dimensionality of the image.
+    size_t global_range[3] = { width, height, depth };
+    size_t *local_range = NULL;
+    int ndim = 1;
 
-  // free the ranges
-  if (local_range) free(local_range);
+    switch (get_base_gl_target(gl_target))
+    {
+        case GL_TEXTURE_1D:
+        case GL_TEXTURE_BUFFER: ndim = 1; break;
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_1D_ARRAY:
+        case GL_COLOR_ATTACHMENT0:
+        case GL_RENDERBUFFER:
+        case GL_TEXTURE_CUBE_MAP: ndim = 2; break;
+        case GL_TEXTURE_3D:
+        case GL_TEXTURE_2D_ARRAY:
+#ifdef GL_VERSION_3_2
+        case GL_TEXTURE_2D_MULTISAMPLE:
+        case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: ndim = 3; break;
+#endif
+        default:
+            log_error("Test error: Unsupported texture target.\n");
+            return 1;
+    }
+
+    // 2D and 3D images have a special way to set the local size (legacy).
+    // Otherwise, we let CL select by leaving local_range as NULL.
+
+    if (gl_target == GL_TEXTURE_2D)
+    {
+        local_range = (size_t *)malloc(sizeof(size_t) * ndim);
+        get_max_common_2D_work_group_size(context, kernel, global_range,
+                                          local_range);
+    }
+    else if (gl_target == GL_TEXTURE_3D)
+    {
+        local_range = (size_t *)malloc(sizeof(size_t) * ndim);
+        get_max_common_3D_work_group_size(context, kernel, global_range,
+                                          local_range);
+    }
+
+    error = clEnqueueNDRangeKernel(queue, kernel, ndim, NULL, global_range,
+                                   local_range, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
+
+    error =
+        (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    // Read results from the CL buffer
+    *outResultBuffer = (void *)(new char[channelSize
+                                         * get_channel_order_channel_count(
+                                             outFormat->image_channel_order)
+                                         * width * height * depth * sampleNum]);
+    error = clEnqueueReadBuffer(
+        queue, streams[1], CL_TRUE, 0,
+        channelSize
+            * get_channel_order_channel_count(outFormat->image_channel_order)
+            * width * height * depth * sampleNum,
+        *outResultBuffer, 0, NULL, NULL);
+    test_error(error, "Unable to read output CL buffer!");
+
+    // free the ranges
+    if (local_range) free(local_range);
 
-  return 0;
+    return 0;
 }
 
-static int test_image_read( cl_context context, cl_command_queue queue,
-  GLenum target, GLuint globj, size_t width, size_t height, size_t depth, size_t sampleNum,
-  cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+static int test_image_read(cl_context context, cl_command_queue queue,
+                           GLenum target, GLuint globj, size_t width,
+                           size_t height, size_t depth, size_t sampleNum,
+                           cl_image_format *outFormat, ExplicitType *outType,
+                           void **outResultBuffer)
 {
-  int error;
-
-  // Create a CL image from the supplied GL texture or renderbuffer.
-  cl_mem image;
-  if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
-    image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_READ_ONLY, globj, &error );
-  } else {
-    image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_READ_ONLY,
-      target, 0, globj, &error );
-  }
-
-  if( error != CL_SUCCESS ) {
-    if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
-      print_error( error, "Unable to create CL image from GL renderbuffer" );
-    } else {
-      print_error( error, "Unable to create CL image from GL texture" );
-      GLint fmt;
-      glGetTexLevelParameteriv( target, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
-      log_error( "    Supplied GL texture was base format %s and internal "
-        "format %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
+    int error;
+
+    // Create a CL image from the supplied GL texture or renderbuffer.
+    cl_mem image;
+    if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0)
+    {
+        image = (*clCreateFromGLRenderbuffer_ptr)(context, CL_MEM_READ_ONLY,
+                                                  globj, &error);
+    }
+    else
+    {
+        image = (*clCreateFromGLTexture_ptr)(context, CL_MEM_READ_ONLY, target,
+                                             0, globj, &error);
     }
-    return error;
-  }
 
-  return test_cl_image_read( context, queue, target, image,
-    width, height, depth, sampleNum, outFormat, outType, outResultBuffer );
+    if (error != CL_SUCCESS)
+    {
+        if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0)
+        {
+            print_error(error,
+                        "Unable to create CL image from GL renderbuffer");
+        }
+        else
+        {
+            print_error(error, "Unable to create CL image from GL texture");
+            GLint fmt;
+            glGetTexLevelParameteriv(target, 0, GL_TEXTURE_INTERNAL_FORMAT,
+                                     &fmt);
+            log_error("    Supplied GL texture was base format %s and internal "
+                      "format %s\n",
+                      GetGLBaseFormatName(fmt), GetGLFormatName(fmt));
+        }
+        return error;
+    }
+
+    return test_cl_image_read(context, queue, target, image, width, height,
+                              depth, sampleNum, outFormat, outType,
+                              outResultBuffer);
 }
 
 static int test_image_format_read(cl_context context, cl_command_queue queue,
                                   size_t width, size_t height, size_t depth,
                                   GLenum target, const format *fmt, MTdata data)
 {
-  int error = 0;
-
-  // Determine the maximum number of supported samples
-  GLint samples = 1;
-  if (target == GL_TEXTURE_2D_MULTISAMPLE || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
-    samples = get_gl_max_samples(target, fmt->internal);
-
-  // If we're testing a half float format, then we need to determine the
-  // rounding mode of this machine.  Punt if we fail to do so.
-
-  if( fmt->type == kHalf )
-  {
-    if( DetectFloatToHalfRoundingMode(queue) )
-      return 1;
-    bool supports_half = false;
-    error = supportsHalf(context, &supports_half);
-    if( error != 0 )
-      return error;
-    if (!supports_half) return 0;
-  }
+    int error = 0;
+
+    // Determine the maximum number of supported samples
+    GLint samples = 1;
+    if (target == GL_TEXTURE_2D_MULTISAMPLE
+        || target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+        samples = get_gl_max_samples(target, fmt->internal);
+
+    // If we're testing a half float format, then we need to determine the
+    // rounding mode of this machine.  Punt if we fail to do so.
+
+    if (fmt->type == kHalf)
+    {
+        if (DetectFloatToHalfRoundingMode(queue)) return 1;
+        bool supports_half = false;
+        error = supportsHalf(context, &supports_half);
+        if (error != 0) return error;
+        if (!supports_half) return 0;
+    }
 #ifdef GL_VERSION_3_2
-    if (get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE ||
-        get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+    if (get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE
+        || get_base_gl_target(target) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
     {
         bool supports_msaa;
         error = supportsMsaa(context, &supports_msaa);
-        if( error != 0 ) return error;
+        if (error != 0) return error;
         if (!supports_msaa) return 0;
     }
-    if (fmt->formattype == GL_DEPTH_COMPONENT ||
-        fmt->formattype == GL_DEPTH_STENCIL)
+    if (fmt->formattype == GL_DEPTH_COMPONENT
+        || fmt->formattype == GL_DEPTH_STENCIL)
     {
         bool supports_depth;
         error = supportsDepth(context, &supports_depth);
-        if( error != 0 ) return error;
+        if (error != 0) return error;
         if (!supports_depth) return 0;
     }
 #endif
-  size_t w = width, h = height, d = depth;
-
-  // Unpack the format and use it, along with the target, to create an
-  // appropriate GL texture.
-
-  GLenum gl_fmt          = fmt->formattype;
-  GLenum gl_internal_fmt = fmt->internal;
-  GLenum gl_type         = fmt->datatype;
-  ExplicitType type      = fmt->type;
-
-  // Required for most of the texture-backed cases:
-  glTextureWrapper texture;
-
-  // Required for the special case of TextureBuffer textures:
-  glBufferWrapper glbuf;
-
-  // And these are required for the case of Renderbuffer images:
-  glFramebufferWrapper glFramebuffer;
-  glRenderbufferWrapper glRenderbuffer;
-
-  void* buffer = NULL;
-
-  // Use the correct texture creation function depending on the target, and
-  // adjust width, height, depth as appropriate so subsequent size calculations
-  // succeed.
-
-  switch (get_base_gl_target(target)) {
-    case GL_TEXTURE_1D:
-      h = 1; d = 1;
-      buffer = CreateGLTexture1D( width, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, true, data );
-      break;
-    case GL_TEXTURE_BUFFER:
-      h = 1; d = 1;
-      buffer = CreateGLTextureBuffer(width, target, gl_fmt, gl_internal_fmt,
-        gl_type, type, &texture, &glbuf, &error, true, data);
-      break;
-    case GL_RENDERBUFFER:
-    case GL_COLOR_ATTACHMENT0:
-      d = 1;
-      buffer = CreateGLRenderbuffer(width, height, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &glFramebuffer, &glRenderbuffer, &error,
-        data, true);
-      break;
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_CUBE_MAP:
-      d = 1;
-      buffer = CreateGLTexture2D(width, height, target, gl_fmt, gl_internal_fmt,
-        gl_type, type, &texture, &error, true, data);
-      break;
-    case GL_TEXTURE_1D_ARRAY:
-      d = 1;
-      buffer = CreateGLTexture1DArray( width, height, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, true, data );
-      break;
-    case GL_TEXTURE_2D_ARRAY:
-      buffer = CreateGLTexture2DArray( width, height, depth, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, true, data );
-      break;
-    case GL_TEXTURE_3D:
-      buffer = CreateGLTexture3D( width, height, depth, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, data, true );
-      break;
+    size_t w = width, h = height, d = depth;
+
+    // Unpack the format and use it, along with the target, to create an
+    // appropriate GL texture.
+
+    GLenum gl_fmt = fmt->formattype;
+    GLenum gl_internal_fmt = fmt->internal;
+    GLenum gl_type = fmt->datatype;
+    ExplicitType type = fmt->type;
+
+    // Required for most of the texture-backed cases:
+    glTextureWrapper texture;
+
+    // Required for the special case of TextureBuffer textures:
+    glBufferWrapper glbuf;
+
+    // And these are required for the case of Renderbuffer images:
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+
+    void *buffer = NULL;
+
+    // Use the correct texture creation function depending on the target, and
+    // adjust width, height, depth as appropriate so subsequent size
+    // calculations succeed.
+
+    switch (get_base_gl_target(target))
+    {
+        case GL_TEXTURE_1D:
+            h = 1;
+            d = 1;
+            buffer =
+                CreateGLTexture1D(width, target, gl_fmt, gl_internal_fmt,
+                                  gl_type, type, &texture, &error, true, data);
+            break;
+        case GL_TEXTURE_BUFFER:
+            h = 1;
+            d = 1;
+            buffer = CreateGLTextureBuffer(
+                width, target, gl_fmt, gl_internal_fmt, gl_type, type, &texture,
+                &glbuf, &error, true, data);
+            break;
+        case GL_RENDERBUFFER:
+        case GL_COLOR_ATTACHMENT0:
+            d = 1;
+            buffer = CreateGLRenderbuffer(
+                width, height, target, gl_fmt, gl_internal_fmt, gl_type, type,
+                &glFramebuffer, &glRenderbuffer, &error, data, true);
+            break;
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_CUBE_MAP:
+            d = 1;
+            buffer = CreateGLTexture2D(width, height, target, gl_fmt,
+                                       gl_internal_fmt, gl_type, type, &texture,
+                                       &error, true, data);
+            break;
+        case GL_TEXTURE_1D_ARRAY:
+            d = 1;
+            buffer = CreateGLTexture1DArray(width, height, target, gl_fmt,
+                                            gl_internal_fmt, gl_type, type,
+                                            &texture, &error, true, data);
+            break;
+        case GL_TEXTURE_2D_ARRAY:
+            buffer = CreateGLTexture2DArray(width, height, depth, target,
+                                            gl_fmt, gl_internal_fmt, gl_type,
+                                            type, &texture, &error, true, data);
+            break;
+        case GL_TEXTURE_3D:
+            buffer = CreateGLTexture3D(width, height, depth, target, gl_fmt,
+                                       gl_internal_fmt, gl_type, type, &texture,
+                                       &error, data, true);
+            break;
 #ifdef GL_VERSION_3_2
-    case GL_TEXTURE_2D_MULTISAMPLE:
-      d = 1;
-      buffer = CreateGLTexture2DMultisample( width, height, samples, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, true, data, true );
-      break;
-    case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
-      buffer = CreateGLTexture2DArrayMultisample( width, height, depth, samples, target, gl_fmt,
-        gl_internal_fmt, gl_type, type, &texture, &error, true, data, true );
-      break;
+        case GL_TEXTURE_2D_MULTISAMPLE:
+            d = 1;
+            buffer = CreateGLTexture2DMultisample(
+                width, height, samples, target, gl_fmt, gl_internal_fmt,
+                gl_type, type, &texture, &error, true, data, true);
+            break;
+        case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+            buffer = CreateGLTexture2DArrayMultisample(
+                width, height, depth, samples, target, gl_fmt, gl_internal_fmt,
+                gl_type, type, &texture, &error, true, data, true);
+            break;
 #endif
-    default:
-      log_error("Unsupported texture target.");
-      return 1;
-  }
+        default: log_error("Unsupported texture target."); return 1;
+    }
 
-  if ( error == -2 ) {
-    log_info("OpenGL texture couldn't be created, because a texture is too big. Skipping test.\n");
-    return 0;
-  }
+    if (error == -2)
+    {
+        log_info("OpenGL texture couldn't be created, because a texture is too "
+                 "big. Skipping test.\n");
+        return 0;
+    }
 
-  // Check to see if the texture could not be created for some other reason like
-  // GL_FRAMEBUFFER_UNSUPPORTED
-  if (error == GL_FRAMEBUFFER_UNSUPPORTED) {
-    log_info("Skipping...\n");
-    return 0;
-  }
-
-  if ( error != 0 ) {
-    if ((gl_fmt == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport())){
-      log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
-        "Skipping test.\n");
-      return 0;
-    } else {
-      return error;
+    // Check to see if the texture could not be created for some other reason
+    // like GL_FRAMEBUFFER_UNSUPPORTED
+    if (error == GL_FRAMEBUFFER_UNSUPPORTED)
+    {
+        log_info("Skipping...\n");
+        return 0;
     }
-  }
 
-  BufferOwningPtr<char> inputBuffer(buffer);
-  if( inputBuffer == NULL )
-    return -1;
+    if (error != 0)
+    {
+        if ((gl_fmt == GL_RGBA_INTEGER_EXT)
+            && (!CheckGLIntegerExtensionSupport()))
+        {
+            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+                     "Skipping test.\n");
+            return 0;
+        }
+        else
+        {
+            return error;
+        }
+    }
 
-  cl_image_format clFormat;
-  ExplicitType actualType;
-  char *outBuffer;
+    BufferOwningPtr<char> inputBuffer(buffer);
+    if (inputBuffer == NULL) return -1;
 
-  // Perform the read:
+    cl_image_format clFormat;
+    ExplicitType actualType;
+    char *outBuffer;
 
-  GLuint globj = texture;
-  if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
-    globj = glRenderbuffer;
-  }
+    // Perform the read:
 
-  error = test_image_read( context, queue, target, globj, w, h, d, samples, &clFormat,
-                          &actualType, (void **)&outBuffer );
+    GLuint globj = texture;
+    if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0)
+    {
+        globj = glRenderbuffer;
+    }
 
-  if( error != 0 )
-    return error;
+    error = test_image_read(context, queue, target, globj, w, h, d, samples,
+                            &clFormat, &actualType, (void **)&outBuffer);
 
-  BufferOwningPtr<char> actualResults(outBuffer);
-  if( actualResults == NULL )
-    return -1;
+    if (error != 0) return error;
 
-  log_info( "- Read [%4d x %4d x %4d x %4d] : GL Texture : %s : %s : %s => CL Image : %s : %s \n",
-    (int)w, (int)h, (int)d, (int)samples, GetGLFormatName( gl_fmt ), GetGLFormatName( gl_internal_fmt ),
-    GetGLTypeName( gl_type ), GetChannelOrderName( clFormat.image_channel_order ),
-    GetChannelTypeName( clFormat.image_channel_data_type ));
+    BufferOwningPtr<char> actualResults(outBuffer);
+    if (actualResults == NULL) return -1;
 
-  BufferOwningPtr<char> convertedInputs;
+    log_info("- Read [%4d x %4d x %4d x %4d] : GL Texture : %s : %s : %s => CL "
+             "Image : %s : %s \n",
+             (int)w, (int)h, (int)d, (int)samples, GetGLFormatName(gl_fmt),
+             GetGLFormatName(gl_internal_fmt), GetGLTypeName(gl_type),
+             GetChannelOrderName(clFormat.image_channel_order),
+             GetChannelTypeName(clFormat.image_channel_data_type));
 
-  // We have to convert our input buffer to the returned type, so we can validate.
-  // This is necessary because OpenCL might not actually pick an internal format
-  // that actually matches our input format (for example, if it picks a normalized
-  // format, the results will come out as floats instead of going in as ints).
+    BufferOwningPtr<char> convertedInputs;
 
-  if ( gl_type == GL_UNSIGNED_INT_2_10_10_10_REV )
-  {
-    cl_uint *p = (cl_uint *)buffer;
-    float *inData = (float *)malloc( w * h * d * samples * sizeof(float) );
+    // We have to convert our input buffer to the returned type, so we can
+    // validate. This is necessary because OpenCL might not actually pick an
+    // internal format that actually matches our input format (for example, if
+    // it picks a normalized format, the results will come out as floats instead
+    // of going in as ints).
 
-    for( size_t i = 0; i < 4 * w * h * d * samples; i += 4 )
+    if (gl_type == GL_UNSIGNED_INT_2_10_10_10_REV)
     {
-      inData[ i + 0 ] = (float)( ( p[ 0 ] >> 20 ) & 0x3ff ) / (float)1023;
-      inData[ i + 1 ] = (float)( ( p[ 0 ] >> 10 ) & 0x3ff ) / (float)1023;
-      inData[ i + 2 ] = (float)( p[ 0 ] & 0x3ff ) / (float)1023;
-      p++;
-    }
+        cl_uint *p = (cl_uint *)buffer;
+        float *inData = (float *)malloc(w * h * d * samples * sizeof(float));
+
+        for (size_t i = 0; i < 4 * w * h * d * samples; i += 4)
+        {
+            inData[i + 0] = (float)((p[0] >> 20) & 0x3ff) / (float)1023;
+            inData[i + 1] = (float)((p[0] >> 10) & 0x3ff) / (float)1023;
+            inData[i + 2] = (float)(p[0] & 0x3ff) / (float)1023;
+            p++;
+        }
 
-    convertedInputs.reset( inData );
-    if( convertedInputs == NULL )
-      return -1;
-  }
-  else if ( gl_type == GL_DEPTH24_STENCIL8 )
-  {
-    // GL_DEPTH24_STENCIL8 is treated as CL_UNORM_INT24 + CL_DEPTH_STENCIL where
-    // the stencil is ignored.
-    cl_uint *p = (cl_uint *)buffer;
-    float *inData = (float *)malloc( w * h * d * samples * sizeof(float) );
-
-    for( size_t i = 0; i < w * h * d * samples; i++ )
+        convertedInputs.reset(inData);
+        if (convertedInputs == NULL) return -1;
+    }
+    else if (gl_type == GL_DEPTH24_STENCIL8)
     {
-      inData[ i ] = (float)((p[i] >> 8) & 0xffffff) / (float)0xfffffe;
+        // GL_DEPTH24_STENCIL8 is treated as CL_UNORM_INT24 + CL_DEPTH_STENCIL
+        // where the stencil is ignored.
+        cl_uint *p = (cl_uint *)buffer;
+        float *inData = (float *)malloc(w * h * d * samples * sizeof(float));
+
+        for (size_t i = 0; i < w * h * d * samples; i++)
+        {
+            inData[i] = (float)((p[i] >> 8) & 0xffffff) / (float)0xfffffe;
+        }
+
+        convertedInputs.reset(inData);
+        if (convertedInputs == NULL) return -1;
     }
+    else if (gl_type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
+    {
+        // GL_FLOAT_32_UNSIGNED_INT_24_8_REV is treated as a CL_FLOAT +
+        // unused 24 + CL_DEPTH_STENCIL; we check the float value and ignore the
+        // second word
 
-    convertedInputs.reset( inData );
-    if( convertedInputs == NULL )
-      return -1;
-  }
-  else if ( gl_type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
-  {
-    // GL_FLOAT_32_UNSIGNED_INT_24_8_REV is treated as a CL_FLOAT +
-    // unused 24 + CL_DEPTH_STENCIL; we check the float value and ignore the
-    // second word
+        float *p = (float *)buffer;
+        float *inData = (float *)malloc(w * h * d * samples * sizeof(float));
 
-    float *p = (float *)buffer;
-    float *inData = (float *)malloc( w * h * d * samples * sizeof(float) );
+        for (size_t i = 0; i < w * h * d * samples; i++)
+        {
+            inData[i] = p[i * 2];
+        }
 
-    for( size_t i = 0; i < w * h * d * samples; i++ )
+        convertedInputs.reset(inData);
+        if (convertedInputs == NULL) return -1;
+    }
+    else
     {
-      inData[ i ] = p[i*2];
+        convertedInputs.reset(convert_to_expected(
+            inputBuffer, w * h * d * samples, type, actualType,
+            get_channel_order_channel_count(clFormat.image_channel_order)));
+        if (convertedInputs == NULL) return -1;
     }
 
-    convertedInputs.reset( inData );
-    if( convertedInputs == NULL )
-      return -1;
-  }
-  else
-  {
-    convertedInputs.reset(convert_to_expected( inputBuffer,
-      w * h * d * samples, type, actualType, get_channel_order_channel_count(clFormat.image_channel_order) ));
-    if( convertedInputs == NULL )
-      return -1;
-  }
-
-  // Now we validate
-  if( actualType == kFloat )
-  {
-    if ( clFormat.image_channel_data_type == CL_UNORM_INT_101010 )
+    // Now we validate
+    if (actualType == kFloat)
     {
-      return validate_float_results_rgb_101010( convertedInputs, actualResults, w, h, d, samples );
+        if (clFormat.image_channel_data_type == CL_UNORM_INT_101010)
+        {
+            return validate_float_results_rgb_101010(
+                convertedInputs, actualResults, w, h, d, samples);
+        }
+        else
+        {
+            return validate_float_results(
+                convertedInputs, actualResults, w, h, d, samples,
+                get_channel_order_channel_count(clFormat.image_channel_order));
+        }
     }
     else
     {
-      return validate_float_results( convertedInputs, actualResults, w, h, d, samples, get_channel_order_channel_count(clFormat.image_channel_order) );
+        return validate_integer_results(convertedInputs, actualResults, w, h, d,
+                                        samples,
+                                        get_explicit_type_size(actualType));
     }
-  }
-  else
-  {
-    return validate_integer_results( convertedInputs, actualResults, w, h, d, samples, get_explicit_type_size( actualType ) );
-  }
 }
 
 int test_images_read_common(cl_device_id device, cl_context context,
@@ -649,85 +705,98 @@ int test_images_read_common(cl_device_id device, cl_context context,
                             size_t nformats, GLenum *targets, size_t ntargets,
                             sizevec_t *sizes, size_t nsizes)
 {
-  int error = 0;
-  RandomSeed seed(gRandomSeed);
+    int error = 0;
+    RandomSeed seed(gRandomSeed);
 
-  // First, ensure this device supports images.
+    // First, ensure this device supports images.
 
-  if (checkForImageSupport(device)) {
-    log_info("Device does not support images.  Skipping test.\n");
-    return 0;
-  }
-
-  size_t fidx, tidx, sidx;
-
-  // Test each format on every target, every size.
-
-  for ( fidx = 0; fidx < nformats; fidx++ ) {
-    for ( tidx = 0; tidx < ntargets; tidx++ ) {
-
-      // Texture buffer only takes an internal format, so the level data passed
-      // by the test and used for verification must match the internal format
-      if ((targets[tidx] == GL_TEXTURE_BUFFER) && (GetGLFormat(formats[ fidx ].internal) != formats[fidx].formattype))
-        continue;
-
-      if ( formats[ fidx ].datatype == GL_UNSIGNED_INT_2_10_10_10_REV )
-      {
-        // Check if the RGB 101010 format is supported
-        if ( is_rgb_101010_supported( context, targets[ tidx ] ) == 0 )
-          break; // skip
-      }
-
-      if (targets[tidx] != GL_TEXTURE_BUFFER)
-        log_info( "Testing image read for GL format %s : %s : %s : %s\n",
-          GetGLTargetName( targets[ tidx ] ),
-          GetGLFormatName( formats[ fidx ].internal ),
-          GetGLBaseFormatName( formats[ fidx ].formattype ),
-          GetGLTypeName( formats[ fidx ].datatype ) );
-      else
-        log_info( "Testing image read for GL format %s : %s\n",
-                 GetGLTargetName( targets[ tidx ] ),
-                 GetGLFormatName( formats[ fidx ].internal ));
-
-      for ( sidx = 0; sidx < nsizes; sidx++ ) {
-
-        // Test this format + size:
-        int err;
-        if ((err = test_image_format_read(context, queue,
-                                    sizes[sidx].width, sizes[sidx].height, sizes[sidx].depth,
-                                    targets[tidx], &formats[fidx], seed) ))
-        {
-          // Negative return values are errors, positive mean the test was skipped
-          if (err < 0) {
+    if (checkForImageSupport(device))
+    {
+        log_info("Device does not support images.  Skipping test.\n");
+        return 0;
+    }
 
-            // We land here in the event of test failure.
+    size_t fidx, tidx, sidx;
 
-            log_error( "ERROR: Image read test failed for %s : %s : %s : %s\n\n",
-              GetGLTargetName( targets[ tidx ] ),
-              GetGLFormatName( formats[ fidx ].internal ),
-              GetGLBaseFormatName( formats[ fidx ].formattype ),
-              GetGLTypeName( formats[ fidx ].datatype ) );
-            error++;
-          }
+    // Test each format on every target, every size.
 
-          // Skip the other sizes for this format.
-          printf("Skipping remaining sizes for this format\n");
+    for (fidx = 0; fidx < nformats; fidx++)
+    {
+        for (tidx = 0; tidx < ntargets; tidx++)
+        {
 
-          break;
+            // Texture buffer only takes an internal format, so the level data
+            // passed by the test and used for verification must match the
+            // internal format
+            if ((targets[tidx] == GL_TEXTURE_BUFFER)
+                && (GetGLFormat(formats[fidx].internal)
+                    != formats[fidx].formattype))
+                continue;
+
+            if (formats[fidx].datatype == GL_UNSIGNED_INT_2_10_10_10_REV)
+            {
+                // Check if the RGB 101010 format is supported
+                if (is_rgb_101010_supported(context, targets[tidx]) == 0)
+                    break; // skip
+            }
+
+            if (targets[tidx] != GL_TEXTURE_BUFFER)
+                log_info("Testing image read for GL format %s : %s : %s : %s\n",
+                         GetGLTargetName(targets[tidx]),
+                         GetGLFormatName(formats[fidx].internal),
+                         GetGLBaseFormatName(formats[fidx].formattype),
+                         GetGLTypeName(formats[fidx].datatype));
+            else
+                log_info("Testing image read for GL format %s : %s\n",
+                         GetGLTargetName(targets[tidx]),
+                         GetGLFormatName(formats[fidx].internal));
+
+            for (sidx = 0; sidx < nsizes; sidx++)
+            {
+
+                // Test this format + size:
+                int err;
+                if ((err = test_image_format_read(
+                         context, queue, sizes[sidx].width, sizes[sidx].height,
+                         sizes[sidx].depth, targets[tidx], &formats[fidx],
+                         seed)))
+                {
+                    // Negative return values are errors, positive mean the test
+                    // was skipped
+                    if (err < 0)
+                    {
+
+                        // We land here in the event of test failure.
+
+                        log_error("ERROR: Image read test failed for %s : %s : "
+                                  "%s : %s\n\n",
+                                  GetGLTargetName(targets[tidx]),
+                                  GetGLFormatName(formats[fidx].internal),
+                                  GetGLBaseFormatName(formats[fidx].formattype),
+                                  GetGLTypeName(formats[fidx].datatype));
+                        error++;
+                    }
+
+                    // Skip the other sizes for this format.
+                    printf("Skipping remaining sizes for this format\n");
+
+                    break;
+                }
+            }
+
+            // Note a successful format test, if we passed every size.
+
+            if (sidx == nsizes)
+            {
+                log_info("passed: Image read test for GL format  %s : %s : %s "
+                         ": %s\n\n",
+                         GetGLTargetName(targets[tidx]),
+                         GetGLFormatName(formats[fidx].internal),
+                         GetGLBaseFormatName(formats[fidx].formattype),
+                         GetGLTypeName(formats[fidx].datatype));
+            }
         }
-      }
-
-      // Note a successful format test, if we passed every size.
-
-      if( sidx == sizeof (sizes) / sizeof( sizes[0] ) ) {
-        log_info( "passed: Image read test for GL format  %s : %s : %s : %s\n\n",
-        GetGLTargetName( targets[ tidx ] ),
-        GetGLFormatName( formats[ fidx ].internal ),
-        GetGLBaseFormatName( formats[ fidx ].formattype ),
-        GetGLTypeName( formats[ fidx ].datatype ) );
-      }
     }
-  }
 
-  return error;
+    return error;
 }
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 0dba83bb..4d721296 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,16 +17,17 @@
 #include "common.h"
 #include <limits.h>
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 
 #pragma mark -
 #pragma mark Write test kernels
 
+// clang-format off
 static const char *kernelpattern_image_write_1D =
 "__kernel void sample_test( __global %s4 *source, write_only image1d_t dest )\n"
 "{\n"
@@ -174,482 +175,534 @@ static const char * kernelpattern_image_write_2D_array_depth =
 
 
 #endif
+// clang-format on
 
 #pragma mark -
 #pragma mark Utility functions
 
-static const char* get_appropriate_write_kernel(GLenum target,
-  ExplicitType type, cl_channel_order channel_order)
+static const char *get_appropriate_write_kernel(GLenum target,
+                                                ExplicitType type,
+                                                cl_channel_order channel_order)
 {
-  switch (get_base_gl_target(target)) {
-    case GL_TEXTURE_1D:
-
-      if (type == kHalf)
-        return kernelpattern_image_write_1D_half;
-      else
-        return kernelpattern_image_write_1D;
-      break;
-    case GL_TEXTURE_BUFFER:
-       if (type == kHalf)
-        return kernelpattern_image_write_1D_buffer_half;
-      else
-        return kernelpattern_image_write_1D_buffer;
-      break;
-    case GL_TEXTURE_1D_ARRAY:
-      if (type == kHalf)
-        return kernelpattern_image_write_1Darray_half;
-      else
-        return kernelpattern_image_write_1Darray;
-      break;
-    case GL_COLOR_ATTACHMENT0:
-    case GL_RENDERBUFFER:
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_CUBE_MAP:
+    switch (get_base_gl_target(target))
+    {
+        case GL_TEXTURE_1D:
+
+            if (type == kHalf)
+                return kernelpattern_image_write_1D_half;
+            else
+                return kernelpattern_image_write_1D;
+            break;
+        case GL_TEXTURE_BUFFER:
+            if (type == kHalf)
+                return kernelpattern_image_write_1D_buffer_half;
+            else
+                return kernelpattern_image_write_1D_buffer;
+            break;
+        case GL_TEXTURE_1D_ARRAY:
+            if (type == kHalf)
+                return kernelpattern_image_write_1Darray_half;
+            else
+                return kernelpattern_image_write_1Darray;
+            break;
+        case GL_COLOR_ATTACHMENT0:
+        case GL_RENDERBUFFER:
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_CUBE_MAP:
 #ifdef GL_VERSION_3_2
-      if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
-        return kernelpattern_image_write_2D_depth;
+            if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+                return kernelpattern_image_write_2D_depth;
 #endif
-      if (type == kHalf)
-        return kernelpattern_image_write_2D_half;
-      else
-        return kernelpattern_image_write_2D;
-      break;
+            if (type == kHalf)
+                return kernelpattern_image_write_2D_half;
+            else
+                return kernelpattern_image_write_2D;
+            break;
 
-    case GL_TEXTURE_2D_ARRAY:
+        case GL_TEXTURE_2D_ARRAY:
 #ifdef GL_VERSION_3_2
-      if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
-        return kernelpattern_image_write_2D_array_depth;
+            if (channel_order == CL_DEPTH || channel_order == CL_DEPTH_STENCIL)
+                return kernelpattern_image_write_2D_array_depth;
 #endif
-      if (type == kHalf)
-        return kernelpattern_image_write_2Darray_half;
-      else
-        return kernelpattern_image_write_2Darray;
-      break;
-
-    case GL_TEXTURE_3D:
-      if (type == kHalf)
-        return kernelpattern_image_write_3D_half;
-      else
-        return kernelpattern_image_write_3D;
-      break;
-
-    default:
-      log_error("Unsupported GL tex target (%s) passed to write test: "
-        "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
-        __FILE__, __LINE__);
-      return NULL;
-  }
+            if (type == kHalf)
+                return kernelpattern_image_write_2Darray_half;
+            else
+                return kernelpattern_image_write_2Darray;
+            break;
+
+        case GL_TEXTURE_3D:
+            if (type == kHalf)
+                return kernelpattern_image_write_3D_half;
+            else
+                return kernelpattern_image_write_3D;
+            break;
+
+        default:
+            log_error("Unsupported GL tex target (%s) passed to write test: "
+                      "%s (%s):%d",
+                      GetGLTargetName(target), __FUNCTION__, __FILE__,
+                      __LINE__);
+            return NULL;
+    }
 }
 
 void set_dimensions_by_target(GLenum target, size_t *dims, size_t sizes[3],
-  size_t width, size_t height, size_t depth)
+                              size_t width, size_t height, size_t depth)
 {
-  switch (get_base_gl_target(target)) {
-    case GL_TEXTURE_1D:
-      sizes[0] = width;
-      *dims = 1;
-      break;
-
-    case GL_TEXTURE_BUFFER:
-      sizes[0] = width;
-      *dims = 1;
-      break;
-
-    case GL_TEXTURE_1D_ARRAY:
-      sizes[0] = width;
-      sizes[1] = height;
-      *dims = 2;
-      break;
-
-    case GL_COLOR_ATTACHMENT0:
-    case GL_RENDERBUFFER:
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_CUBE_MAP:
-
-      sizes[0] = width;
-      sizes[1] = height;
-      *dims = 2;
-      break;
-
-    case GL_TEXTURE_2D_ARRAY:
-      sizes[0] = width;
-      sizes[1] = height;
-      sizes[2] = depth;
-      *dims = 3;
-      break;
-
-    case GL_TEXTURE_3D:
-      sizes[0] = width;
-      sizes[1] = height;
-      sizes[2] = depth;
-      *dims = 3;
-      break;
-
-    default:
-      log_error("Unsupported GL tex target (%s) passed to write test: "
-        "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
-        __FILE__, __LINE__);
-  }
+    switch (get_base_gl_target(target))
+    {
+        case GL_TEXTURE_1D:
+            sizes[0] = width;
+            *dims = 1;
+            break;
+
+        case GL_TEXTURE_BUFFER:
+            sizes[0] = width;
+            *dims = 1;
+            break;
+
+        case GL_TEXTURE_1D_ARRAY:
+            sizes[0] = width;
+            sizes[1] = height;
+            *dims = 2;
+            break;
+
+        case GL_COLOR_ATTACHMENT0:
+        case GL_RENDERBUFFER:
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_CUBE_MAP:
+
+            sizes[0] = width;
+            sizes[1] = height;
+            *dims = 2;
+            break;
+
+        case GL_TEXTURE_2D_ARRAY:
+            sizes[0] = width;
+            sizes[1] = height;
+            sizes[2] = depth;
+            *dims = 3;
+            break;
+
+        case GL_TEXTURE_3D:
+            sizes[0] = width;
+            sizes[1] = height;
+            sizes[2] = depth;
+            *dims = 3;
+            break;
+
+        default:
+            log_error("Unsupported GL tex target (%s) passed to write test: "
+                      "%s (%s):%d",
+                      GetGLTargetName(target), __FUNCTION__, __FILE__,
+                      __LINE__);
+    }
 }
 
-int test_cl_image_write( cl_context context, cl_command_queue queue,
-  GLenum target, cl_mem clImage, size_t width, size_t height, size_t depth,
-  cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer,
-  MTdata d, bool supports_half )
+int test_cl_image_write(cl_context context, cl_command_queue queue,
+                        GLenum target, cl_mem clImage, size_t width,
+                        size_t height, size_t depth, cl_image_format *outFormat,
+                        ExplicitType *outType, void **outSourceBuffer, MTdata d,
+                        bool supports_half)
 {
-  size_t global_dims, global_sizes[3];
-  clProgramWrapper program;
-  clKernelWrapper kernel;
-  clMemWrapper inStream;
-  char* programPtr;
-  int error;
-  char kernelSource[2048];
-
-  // What CL format did we get from the texture?
-
-  error = clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format),
-    outFormat, NULL);
-  test_error(error, "Unable to get the CL image format");
-
-  // Create the kernel source.  The target and the data type will influence
-  // which particular kernel we choose.
-
-  *outType = get_write_kernel_type( outFormat );
-  size_t channelSize = get_explicit_type_size(*outType);
-
-  const char* appropriateKernel = get_appropriate_write_kernel(target,
-    *outType, outFormat->image_channel_order);
-  if (*outType == kHalf && !supports_half) {
-    log_info("cl_khr_fp16 isn't supported. Skip this test.\n");
-    return 0;
-  }
+    size_t global_dims, global_sizes[3];
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper inStream;
+    char *programPtr;
+    int error;
+    char kernelSource[2048];
 
-  const char* suffix = get_kernel_suffix( outFormat );
-  const char* convert = get_write_conversion( outFormat, *outType );
+    // What CL format did we get from the texture?
 
-  sprintf(kernelSource, appropriateKernel, get_explicit_type_name( *outType ),
-    get_explicit_type_name( *outType ), suffix, convert);
+    error = clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format),
+                           outFormat, NULL);
+    test_error(error, "Unable to get the CL image format");
 
-  programPtr = kernelSource;
-  if( create_single_kernel_helper_with_build_options( context, &program, &kernel, 1,
-    (const char **)&programPtr, "sample_test", "" ) )
-  {
-      return -1;
-  }
+    // Create the kernel source.  The target and the data type will influence
+    // which particular kernel we choose.
 
-  // Create an appropriately-sized output buffer.
+    *outType = get_write_kernel_type(outFormat);
+    size_t channelSize = get_explicit_type_size(*outType);
 
-  // Check to see if the output buffer will fit on the device
-  size_t bytes = channelSize * 4 * width * height * depth;
-  cl_ulong alloc_size = 0;
+    const char *appropriateKernel = get_appropriate_write_kernel(
+        target, *outType, outFormat->image_channel_order);
+    if (*outType == kHalf && !supports_half)
+    {
+        log_info("cl_khr_fp16 isn't supported. Skip this test.\n");
+        return 0;
+    }
 
-  cl_device_id device = NULL;
-  error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(device), &device, NULL);
-  test_error( error, "Unable to query command queue for device" );
+    const char *suffix = get_kernel_suffix(outFormat);
+    const char *convert = get_write_conversion(outFormat, *outType);
 
-  error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(alloc_size), &alloc_size, NULL);
-  test_error( error, "Unable to device for max mem alloc size" );
+    sprintf(kernelSource, appropriateKernel, get_explicit_type_name(*outType),
+            get_explicit_type_name(*outType), suffix, convert);
 
-  if (bytes > alloc_size) {
-    log_info("  Skipping: Buffer size (%lu) is greater than CL_DEVICE_MAX_MEM_ALLOC_SIZE (%lu)\n", bytes, alloc_size);
-    *outSourceBuffer = NULL;
-    return 0;
-  }
+    programPtr = kernelSource;
+    if (create_single_kernel_helper_with_build_options(
+            context, &program, &kernel, 1, (const char **)&programPtr,
+            "sample_test", ""))
+    {
+        return -1;
+    }
 
-  *outSourceBuffer = CreateRandomData(*outType, width * height * depth * 4, d);
+    // Create an appropriately-sized output buffer.
 
-  inStream = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR,
-    channelSize * 4 * width * height * depth, *outSourceBuffer, &error );
-  test_error( error, "Unable to create output buffer" );
+    // Check to see if the output buffer will fit on the device
+    size_t bytes = channelSize * 4 * width * height * depth;
+    cl_ulong alloc_size = 0;
 
-  clSamplerWrapper sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
-  test_error( error, "Unable to create sampler" );
+    cl_device_id device = NULL;
+    error = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(device),
+                                  &device, NULL);
+    test_error(error, "Unable to query command queue for device");
 
-  error = clSetKernelArg( kernel, 0, sizeof( inStream ), &inStream );
-  test_error( error, "Unable to set kernel arguments" );
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(alloc_size), &alloc_size, NULL);
+    test_error(error, "Unable to device for max mem alloc size");
 
-  error = clSetKernelArg( kernel, 1, sizeof( clImage ), &clImage );
-  test_error( error, "Unable to set kernel arguments" );
+    if (bytes > alloc_size)
+    {
+        log_info("  Skipping: Buffer size (%lu) is greater than "
+                 "CL_DEVICE_MAX_MEM_ALLOC_SIZE (%lu)\n",
+                 bytes, alloc_size);
+        *outSourceBuffer = NULL;
+        return 0;
+    }
+
+    *outSourceBuffer =
+        CreateRandomData(*outType, width * height * depth * 4, d);
+
+    inStream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                              channelSize * 4 * width * height * depth,
+                              *outSourceBuffer, &error);
+    test_error(error, "Unable to create output buffer");
+
+    clSamplerWrapper sampler = clCreateSampler(
+        context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
+    test_error(error, "Unable to create sampler");
+
+    error = clSetKernelArg(kernel, 0, sizeof(inStream), &inStream);
+    test_error(error, "Unable to set kernel arguments");
 
-  // Flush and Acquire.
+    error = clSetKernelArg(kernel, 1, sizeof(clImage), &clImage);
+    test_error(error, "Unable to set kernel arguments");
 
-  glFinish();
+    // Flush and Acquire.
 
-  error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &clImage, 0, NULL, NULL);
-  test_error( error, "Unable to acquire GL obejcts");
+    glFinish();
 
-  // Execute ( letting OpenCL choose the local size )
+    error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &clImage, 0, NULL, NULL);
+    test_error(error, "Unable to acquire GL obejcts");
 
-  // Setup the global dimensions and sizes based on the target type.
-  set_dimensions_by_target(target, &global_dims, global_sizes,
-    width, height, depth);
+    // Execute ( letting OpenCL choose the local size )
 
-  error = clEnqueueNDRangeKernel( queue, kernel, global_dims, NULL,
-    global_sizes, NULL, 0, NULL, NULL );
-  test_error( error, "Unable to execute test kernel" );
+    // Setup the global dimensions and sizes based on the target type.
+    set_dimensions_by_target(target, &global_dims, global_sizes, width, height,
+                             depth);
 
-  clEventWrapper event;
-  error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &clImage, 0, NULL, &event );
-  test_error(error, "clEnqueueReleaseGLObjects failed");
+    error = clEnqueueNDRangeKernel(queue, kernel, global_dims, NULL,
+                                   global_sizes, NULL, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
 
-  error = clWaitForEvents( 1, &event );
-  test_error(error, "clWaitForEvents failed");
+    clEventWrapper event;
+    error =
+        (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &clImage, 0, NULL, &event);
+    test_error(error, "clEnqueueReleaseGLObjects failed");
 
-  return 0;
+    error = clWaitForEvents(1, &event);
+    test_error(error, "clWaitForEvents failed");
+
+    return 0;
 }
 
-static int test_image_write( cl_context context, cl_command_queue queue,
-  GLenum glTarget, GLuint glTexture, size_t width, size_t height, size_t depth,
-  cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer,
-  MTdata d, bool supports_half )
+static int test_image_write(cl_context context, cl_command_queue queue,
+                            GLenum glTarget, GLuint glTexture, size_t width,
+                            size_t height, size_t depth,
+                            cl_image_format *outFormat, ExplicitType *outType,
+                            void **outSourceBuffer, MTdata d,
+                            bool supports_half)
 {
-  int error;
-
-  // Create a CL image from the supplied GL texture
-  clMemWrapper image = (*clCreateFromGLTexture_ptr)( context, CL_MEM_WRITE_ONLY,
-    glTarget, 0, glTexture, &error );
-
-  if ( error != CL_SUCCESS ) {
-    print_error( error, "Unable to create CL image from GL texture" );
-    GLint fmt;
-    glGetTexLevelParameteriv( glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt );
-    log_error( "    Supplied GL texture was base format %s and internal "
-      "format %s\n", GetGLBaseFormatName( fmt ), GetGLFormatName( fmt ) );
-    return error;
-  }
+    int error;
+
+    // Create a CL image from the supplied GL texture
+    clMemWrapper image = (*clCreateFromGLTexture_ptr)(
+        context, CL_MEM_WRITE_ONLY, glTarget, 0, glTexture, &error);
+
+    if (error != CL_SUCCESS)
+    {
+        print_error(error, "Unable to create CL image from GL texture");
+        GLint fmt;
+        glGetTexLevelParameteriv(glTarget, 0, GL_TEXTURE_INTERNAL_FORMAT, &fmt);
+        log_error("    Supplied GL texture was base format %s and internal "
+                  "format %s\n",
+                  GetGLBaseFormatName(fmt), GetGLFormatName(fmt));
+        return error;
+    }
 
-  return test_cl_image_write( context, queue, glTarget, image,
-    width, height, depth, outFormat, outType, outSourceBuffer, d, supports_half );
+    return test_cl_image_write(context, queue, glTarget, image, width, height,
+                               depth, outFormat, outType, outSourceBuffer, d,
+                               supports_half);
 }
 
-int supportsHalf(cl_context context, bool* supports_half)
+int supportsHalf(cl_context context, bool *supports_half)
 {
-  int error;
-  cl_uint numDev;
+    int error;
+    cl_uint numDev;
 
-  error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
-  test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
+    error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint),
+                             &numDev, NULL);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
 
-  cl_device_id* devices = new cl_device_id[numDev];
-  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDev * sizeof(cl_device_id), devices, NULL);
-  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+    cl_device_id *devices = new cl_device_id[numDev];
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES,
+                             numDev * sizeof(cl_device_id), devices, NULL);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
 
-  *supports_half = is_extension_available(devices[0], "cl_khr_fp16");
-  delete [] devices;
+    *supports_half = is_extension_available(devices[0], "cl_khr_fp16");
+    delete[] devices;
 
-  return error;
+    return error;
 }
 
-int supportsMsaa(cl_context context, bool* supports_msaa)
+int supportsMsaa(cl_context context, bool *supports_msaa)
 {
-  int error;
-  cl_uint numDev;
+    int error;
+    cl_uint numDev;
 
-  error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
-  test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
+    error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint),
+                             &numDev, NULL);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
 
-  cl_device_id* devices = new cl_device_id[numDev];
-  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDev * sizeof(cl_device_id), devices, NULL);
-  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+    cl_device_id *devices = new cl_device_id[numDev];
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES,
+                             numDev * sizeof(cl_device_id), devices, NULL);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
 
-  *supports_msaa = is_extension_available(devices[0], "cl_khr_gl_msaa_sharing");
-  delete [] devices;
+    *supports_msaa =
+        is_extension_available(devices[0], "cl_khr_gl_msaa_sharing");
+    delete[] devices;
 
-  return error;
+    return error;
 }
 
-int supportsDepth(cl_context context, bool* supports_depth)
+int supportsDepth(cl_context context, bool *supports_depth)
 {
-  int error;
-  cl_uint numDev;
+    int error;
+    cl_uint numDev;
 
-  error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
-  test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
+    error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint),
+                             &numDev, NULL);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_NUM_DEVICES failed");
 
-  cl_device_id* devices = new cl_device_id[numDev];
-  error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDev * sizeof(cl_device_id), devices, NULL);
-  test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
+    cl_device_id *devices = new cl_device_id[numDev];
+    error = clGetContextInfo(context, CL_CONTEXT_DEVICES,
+                             numDev * sizeof(cl_device_id), devices, NULL);
+    test_error(error, "clGetContextInfo for CL_CONTEXT_DEVICES failed");
 
-  *supports_depth = is_extension_available(devices[0], "cl_khr_gl_depth_images");
-  delete [] devices;
+    *supports_depth =
+        is_extension_available(devices[0], "cl_khr_gl_depth_images");
+    delete[] devices;
 
-  return error;
+    return error;
 }
 
-static int test_image_format_write( cl_context context, cl_command_queue queue,
-  size_t width, size_t height, size_t depth, GLenum target, GLenum format,
-  GLenum internalFormat,  GLenum glType, ExplicitType type, MTdata d )
+static int test_image_format_write(cl_context context, cl_command_queue queue,
+                                   size_t width, size_t height, size_t depth,
+                                   GLenum target, GLenum format,
+                                   GLenum internalFormat, GLenum glType,
+                                   ExplicitType type, MTdata d)
 {
-  int error;
-  // If we're testing a half float format, then we need to determine the
-  // rounding mode of this machine.  Punt if we fail to do so.
-
-  if( type == kHalf )
-    if( DetectFloatToHalfRoundingMode(queue) )
-      return 1;
-
-  // Create an appropriate GL texture or renderbuffer, given the target.
-
-  glTextureWrapper glTexture;
-  glBufferWrapper glBuf;
-  glFramebufferWrapper glFramebuffer;
-  glRenderbufferWrapper glRenderbuffer;
-  switch (get_base_gl_target(target)) {
-    case GL_TEXTURE_1D:
-      CreateGLTexture1D( width, target, format, internalFormat, glType,
-        type, &glTexture, &error, false, d );
-      break;
-    case GL_TEXTURE_BUFFER:
-      CreateGLTextureBuffer( width, target, format, internalFormat, glType,
-        type, &glTexture, &glBuf, &error, false, d );
-      break;
-    case GL_TEXTURE_1D_ARRAY:
-      CreateGLTexture1DArray( width, height, target, format, internalFormat,
-        glType, type, &glTexture, &error, false, d );
-      break;
-    case GL_TEXTURE_RECTANGLE_EXT:
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_CUBE_MAP:
-      CreateGLTexture2D( width, height, target, format, internalFormat, glType,
-        type, &glTexture, &error, false, d );
-      break;
-    case GL_COLOR_ATTACHMENT0:
-    case GL_RENDERBUFFER:
-      CreateGLRenderbuffer(width, height, target, format, internalFormat,
-        glType, type, &glFramebuffer, &glRenderbuffer, &error, d, false);
-    case GL_TEXTURE_2D_ARRAY:
-      CreateGLTexture2DArray( width, height, depth, target, format,
-        internalFormat, glType, type, &glTexture, &error, false, d );
-      break;
-    case GL_TEXTURE_3D:
-      CreateGLTexture3D( width, height, depth, target, format,
-        internalFormat, glType, type, &glTexture, &error, d, false );
-      break;
-
-    default:
-      log_error("Unsupported GL tex target (%s) passed to write test: "
-        "%s (%s):%d", GetGLTargetName(target), __FUNCTION__,
-        __FILE__, __LINE__);
-  }
-
-  // If there was a problem during creation, make sure it isn't a known
-  // cause, and then complain.
-  if ( error == -2 ) {
-    log_info("OpenGL texture couldn't be created, because a texture is too big. Skipping test.\n");
-    return 0;
-  }
-
-  if ( error != 0 ) {
-    if ((format == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport())){
-      log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
-        "Skipping test.\n");
-      return 0;
-    } else {
-      return error;
+    int error;
+    // If we're testing a half float format, then we need to determine the
+    // rounding mode of this machine.  Punt if we fail to do so.
+
+    if (type == kHalf)
+        if (DetectFloatToHalfRoundingMode(queue)) return 1;
+
+    // Create an appropriate GL texture or renderbuffer, given the target.
+
+    glTextureWrapper glTexture;
+    glBufferWrapper glBuf;
+    glFramebufferWrapper glFramebuffer;
+    glRenderbufferWrapper glRenderbuffer;
+    switch (get_base_gl_target(target))
+    {
+        case GL_TEXTURE_1D:
+            CreateGLTexture1D(width, target, format, internalFormat, glType,
+                              type, &glTexture, &error, false, d);
+            break;
+        case GL_TEXTURE_BUFFER:
+            CreateGLTextureBuffer(width, target, format, internalFormat, glType,
+                                  type, &glTexture, &glBuf, &error, false, d);
+            break;
+        case GL_TEXTURE_1D_ARRAY:
+            CreateGLTexture1DArray(width, height, target, format,
+                                   internalFormat, glType, type, &glTexture,
+                                   &error, false, d);
+            break;
+        case GL_TEXTURE_RECTANGLE_EXT:
+        case GL_TEXTURE_2D:
+        case GL_TEXTURE_CUBE_MAP:
+            CreateGLTexture2D(width, height, target, format, internalFormat,
+                              glType, type, &glTexture, &error, false, d);
+            break;
+        case GL_COLOR_ATTACHMENT0:
+        case GL_RENDERBUFFER:
+            CreateGLRenderbuffer(width, height, target, format, internalFormat,
+                                 glType, type, &glFramebuffer, &glRenderbuffer,
+                                 &error, d, false);
+        case GL_TEXTURE_2D_ARRAY:
+            CreateGLTexture2DArray(width, height, depth, target, format,
+                                   internalFormat, glType, type, &glTexture,
+                                   &error, false, d);
+            break;
+        case GL_TEXTURE_3D:
+            CreateGLTexture3D(width, height, depth, target, format,
+                              internalFormat, glType, type, &glTexture, &error,
+                              d, false);
+            break;
+
+        default:
+            log_error("Unsupported GL tex target (%s) passed to write test: "
+                      "%s (%s):%d",
+                      GetGLTargetName(target), __FUNCTION__, __FILE__,
+                      __LINE__);
     }
-  }
-
-  // Run and get the results
-  cl_image_format clFormat;
-  ExplicitType sourceType;
-  ExplicitType validationType;
-  void *outSourceBuffer = NULL;
-
-  GLenum globj = glTexture;
-  if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0) {
-    globj = glRenderbuffer;
-  }
-
-  bool supports_half = false;
-  error = supportsHalf(context, &supports_half);
-  if( error != 0 )
-    return error;
 
-  error = test_image_write( context, queue, target, globj, width, height,
-    depth, &clFormat, &sourceType, (void **)&outSourceBuffer, d, supports_half );
+    // If there was a problem during creation, make sure it isn't a known
+    // cause, and then complain.
+    if (error == -2)
+    {
+        log_info("OpenGL texture couldn't be created, because a texture is too "
+                 "big. Skipping test.\n");
+        return 0;
+    }
 
-  if( error != 0 || ((sourceType == kHalf ) && !supports_half)) {
-    if (outSourceBuffer)
-      free(outSourceBuffer);
-    return error;
-  }
+    if (error != 0)
+    {
+        if ((format == GL_RGBA_INTEGER_EXT)
+            && (!CheckGLIntegerExtensionSupport()))
+        {
+            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+                     "Skipping test.\n");
+            return 0;
+        }
+        else
+        {
+            return error;
+        }
+    }
 
-  if (!outSourceBuffer)
-    return 0;
+    // Run and get the results
+    cl_image_format clFormat;
+    ExplicitType sourceType;
+    ExplicitType validationType;
+    void *outSourceBuffer = NULL;
+
+    GLenum globj = glTexture;
+    if (target == GL_RENDERBUFFER || target == GL_COLOR_ATTACHMENT0)
+    {
+        globj = glRenderbuffer;
+    }
+
+    bool supports_half = false;
+    error = supportsHalf(context, &supports_half);
+    if (error != 0) return error;
+
+    error = test_image_write(context, queue, target, globj, width, height,
+                             depth, &clFormat, &sourceType,
+                             (void **)&outSourceBuffer, d, supports_half);
+
+    if (error != 0 || ((sourceType == kHalf) && !supports_half))
+    {
+        if (outSourceBuffer) free(outSourceBuffer);
+        return error;
+    }
+
+    if (!outSourceBuffer) return 0;
+
+    // If actual source type was half, convert to float for validation.
+
+    if (sourceType == kHalf)
+        validationType = kFloat;
+    else
+        validationType = sourceType;
 
-  // If actual source type was half, convert to float for validation.
-
-  if ( sourceType == kHalf )
-    validationType = kFloat;
-  else
-    validationType = sourceType;
-
-  BufferOwningPtr<char> validationSource;
-
-  if ( clFormat.image_channel_data_type == CL_UNORM_INT_101010 )
-  {
-    validationSource.reset( outSourceBuffer );
-  }
-  else
-  {
-    validationSource.reset( convert_to_expected( outSourceBuffer,
-      width * height * depth, sourceType, validationType, get_channel_order_channel_count(clFormat.image_channel_order) ) );
-    free(outSourceBuffer);
-  }
-
-  log_info( "- Write for %s [%4ld x %4ld x %4ld] : GL Texture : %s : %s : %s =>"
-    " CL Image : %s : %s \n",
-    GetGLTargetName(target),
-    width, height, depth,
-    GetGLFormatName( format ),
-    GetGLFormatName( internalFormat ),
-    GetGLTypeName( glType),
-    GetChannelOrderName( clFormat.image_channel_order ),
-    GetChannelTypeName( clFormat.image_channel_data_type ));
-
-  // Read the results from the GL texture.
-
-  ExplicitType readType = type;
-  BufferOwningPtr<char> glResults( ReadGLTexture(
-    target, glTexture, glBuf, width, format,
-    internalFormat, glType, readType, /* unused */ 1, 1 ) );
-  if( glResults == NULL )
-    return -1;
-
-  // We have to convert our input buffer to the returned type, so we can validate.
-  BufferOwningPtr<char> convertedGLResults;
-  if ( clFormat.image_channel_data_type != CL_UNORM_INT_101010 )
-  {
-    convertedGLResults.reset( convert_to_expected(
-      glResults, width * height * depth, readType, validationType, get_channel_order_channel_count(clFormat.image_channel_order), glType ));
-  }
-
-  // Validate.
-
-  int valid = 0;
-  if (convertedGLResults) {
-    if( sourceType == kFloat || sourceType == kHalf )
+    BufferOwningPtr<char> validationSource;
+
+    if (clFormat.image_channel_data_type == CL_UNORM_INT_101010)
     {
-      if ( clFormat.image_channel_data_type == CL_UNORM_INT_101010 )
-      {
-        valid = validate_float_results_rgb_101010( validationSource, glResults, width, height, depth, 1 );
-      }
-      else
-      {
-        valid = validate_float_results( validationSource, convertedGLResults,
-          width, height, depth, 1, get_channel_order_channel_count(clFormat.image_channel_order) );
-      }
+        validationSource.reset(outSourceBuffer);
     }
     else
     {
-      valid = validate_integer_results( validationSource, convertedGLResults,
-        width, height, depth, 1, get_explicit_type_size( readType ) );
+        validationSource.reset(convert_to_expected(
+            outSourceBuffer, width * height * depth, sourceType, validationType,
+            get_channel_order_channel_count(clFormat.image_channel_order)));
+        free(outSourceBuffer);
+    }
+
+    log_info(
+        "- Write for %s [%4ld x %4ld x %4ld] : GL Texture : %s : %s : %s =>"
+        " CL Image : %s : %s \n",
+        GetGLTargetName(target), width, height, depth, GetGLFormatName(format),
+        GetGLFormatName(internalFormat), GetGLTypeName(glType),
+        GetChannelOrderName(clFormat.image_channel_order),
+        GetChannelTypeName(clFormat.image_channel_data_type));
+
+    // Read the results from the GL texture.
+
+    ExplicitType readType = type;
+    BufferOwningPtr<char> glResults(
+        ReadGLTexture(target, glTexture, glBuf, width, format, internalFormat,
+                      glType, readType, /* unused */ 1, 1));
+    if (glResults == NULL) return -1;
+
+    // We have to convert our input buffer to the returned type, so we can
+    // validate.
+    BufferOwningPtr<char> convertedGLResults;
+    if (clFormat.image_channel_data_type != CL_UNORM_INT_101010)
+    {
+        convertedGLResults.reset(convert_to_expected(
+            glResults, width * height * depth, readType, validationType,
+            get_channel_order_channel_count(clFormat.image_channel_order),
+            glType));
+    }
+
+    // Validate.
+
+    int valid = 0;
+    if (convertedGLResults)
+    {
+        if (sourceType == kFloat || sourceType == kHalf)
+        {
+            if (clFormat.image_channel_data_type == CL_UNORM_INT_101010)
+            {
+                valid = validate_float_results_rgb_101010(
+                    validationSource, glResults, width, height, depth, 1);
+            }
+            else
+            {
+                valid =
+                    validate_float_results(validationSource, convertedGLResults,
+                                           width, height, depth, 1,
+                                           get_channel_order_channel_count(
+                                               clFormat.image_channel_order));
+            }
+        }
+        else
+        {
+            valid = validate_integer_results(
+                validationSource, convertedGLResults, width, height, depth, 1,
+                get_explicit_type_size(readType));
+        }
     }
-  }
 
-  return valid;
+    return valid;
 }
 
 #pragma mark -
@@ -664,152 +717,180 @@ int test_images_write_common(cl_device_id device, cl_context context,
                              size_t nformats, GLenum *targets, size_t ntargets,
                              sizevec_t *sizes, size_t nsizes)
 {
-  int err = 0;
-  int error = 0;
-  RandomSeed seed(gRandomSeed);
+    int err = 0;
+    int error = 0;
+    RandomSeed seed(gRandomSeed);
 
-  // First, ensure this device supports images.
+    // First, ensure this device supports images.
 
-  if (checkForImageSupport(device)) {
-    log_info("Device does not support images.  Skipping test.\n");
-    return 0;
-  }
-
-  // Get the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE
-  cl_ulong max_individual_allocation_size = 0;
-  err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
-                        sizeof(max_individual_allocation_size),
-                        &max_individual_allocation_size, NULL);
-  if (err) {
-    log_error("ERROR: clGetDeviceInfo failed for CL_DEVICE_MAX_MEM_ALLOC_SIZE.\n");
-    error++;
-    return error;
-  }
-
-  size_t total_allocation_size;
-  size_t fidx, tidx, sidx;
-
-  for ( fidx = 0; fidx < nformats; fidx++ ) {
-    for ( tidx = 0; tidx < ntargets; tidx++ ) {
-
-      // Texture buffer only takes an internal format, so the level data passed
-      // by the test and used for verification must match the internal format
-      if ((targets[tidx] == GL_TEXTURE_BUFFER) && (GetGLFormat(formats[ fidx ].internal) != formats[fidx].formattype))
-        continue;
-
-      if ( formats[ fidx ].datatype == GL_UNSIGNED_INT_2_10_10_10_REV )
-      {
-        // Check if the RGB 101010 format is supported
-        if ( is_rgb_101010_supported( context, targets[ tidx ] ) == 0 )
-          continue; // skip
-      }
-
-      if (formats[ fidx ].datatype == GL_UNSIGNED_INT_24_8)
-      {
-        //check if a implementation supports writing to the depth stencil formats
-        cl_image_format imageFormat = { CL_DEPTH_STENCIL, CL_UNORM_INT24 };
-        if (!is_image_format_supported(context, CL_MEM_WRITE_ONLY, (targets[tidx] == GL_TEXTURE_2D || targets[tidx] == GL_TEXTURE_RECTANGLE) ? CL_MEM_OBJECT_IMAGE2D: CL_MEM_OBJECT_IMAGE2D_ARRAY, &imageFormat))
-          continue;
-      }
-
-      if (formats[ fidx ].datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
-      {
-        //check if a implementation supports writing to the depth stencil formats
-        cl_image_format imageFormat = { CL_DEPTH_STENCIL, CL_FLOAT};
-        if (!is_image_format_supported(context, CL_MEM_WRITE_ONLY, (targets[tidx] == GL_TEXTURE_2D || targets[tidx] == GL_TEXTURE_RECTANGLE) ? CL_MEM_OBJECT_IMAGE2D: CL_MEM_OBJECT_IMAGE2D_ARRAY, &imageFormat))
-          continue;
-      }
-
-      if (targets[tidx] != GL_TEXTURE_BUFFER)
-        log_info( "Testing image write for GL format %s : %s : %s : %s\n",
-                 GetGLTargetName( targets[ tidx ] ),
-                 GetGLFormatName( formats[ fidx ].internal ),
-                 GetGLBaseFormatName( formats[ fidx ].formattype ),
-                 GetGLTypeName( formats[ fidx ].datatype ) );
-      else
-        log_info( "Testing image write for GL format %s : %s\n",
-                 GetGLTargetName( targets[ tidx ] ),
-                 GetGLFormatName( formats[ fidx ].internal ));
-
-
-      for (sidx = 0; sidx < nsizes; sidx++) {
-
-        // All tested formats are 4-channel formats
-        total_allocation_size =
-           sizes[sidx].width * sizes[sidx].height * sizes[sidx].depth *
-           4 * get_explicit_type_size( formats[ fidx ].type );
-
-        if (total_allocation_size > max_individual_allocation_size) {
-          log_info( "The requested allocation size (%gMB) is larger than the "
-                    "maximum individual allocation size (%gMB)\n",
-                    total_allocation_size/(1024.0*1024.0),
-                    max_individual_allocation_size/(1024.0*1024.0));
-          log_info( "Skipping write test for %s : %s : %s : %s "
-                    " and size (%ld, %ld, %ld)\n",
-                    GetGLTargetName( targets[ tidx ] ),
-                    GetGLFormatName( formats[ fidx ].internal ),
-                    GetGLBaseFormatName( formats[ fidx ].formattype ),
-                    GetGLTypeName( formats[ fidx ].datatype ),
-                    sizes[sidx].width,
-                    sizes[sidx].height,
-                    sizes[sidx].depth);
-          continue;
-        }
-#ifdef GL_VERSION_3_2
-        if (get_base_gl_target(targets[ tidx ]) == GL_TEXTURE_2D_MULTISAMPLE ||
-            get_base_gl_target(targets[ tidx ]) == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
-        {
-            bool supports_msaa;
-            int errorInGetInfo = supportsMsaa(context, &supports_msaa);
-            if (errorInGetInfo != 0) return errorInGetInfo;
-            if (!supports_msaa) return 0;
-        }
-        if (formats[ fidx ].formattype == GL_DEPTH_COMPONENT ||
-            formats[ fidx ].formattype == GL_DEPTH_STENCIL)
-        {
-            bool supports_depth;
-            int errorInGetInfo = supportsDepth(context, &supports_depth);
-            if (errorInGetInfo != 0) return errorInGetInfo;
-            if (!supports_depth) return 0;
-        }
-#endif
+    if (checkForImageSupport(device))
+    {
+        log_info("Device does not support images.  Skipping test.\n");
+        return 0;
+    }
+
+    // Get the value of CL_DEVICE_MAX_MEM_ALLOC_SIZE
+    cl_ulong max_individual_allocation_size = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                          sizeof(max_individual_allocation_size),
+                          &max_individual_allocation_size, NULL);
+    if (err)
+    {
+        log_error("ERROR: clGetDeviceInfo failed for "
+                  "CL_DEVICE_MAX_MEM_ALLOC_SIZE.\n");
+        error++;
+        return error;
+    }
+
+    size_t total_allocation_size;
+    size_t fidx, tidx, sidx;
 
-        if( test_image_format_write( context, queue,
-                                     sizes[sidx].width,
-                                     sizes[sidx].height,
-                                     sizes[sidx].depth,
-                                     targets[ tidx ],
-                                     formats[ fidx ].formattype,
-                                     formats[ fidx ].internal,
-                                     formats[ fidx ].datatype,
-                                     formats[ fidx ].type, seed ) )
+    for (fidx = 0; fidx < nformats; fidx++)
+    {
+        for (tidx = 0; tidx < ntargets; tidx++)
         {
-          log_error( "ERROR: Image write test failed for %s : %s : %s : %s "
-            " and size (%ld, %ld, %ld)\n\n",
-            GetGLTargetName( targets[ tidx ] ),
-            GetGLFormatName( formats[ fidx ].internal ),
-            GetGLBaseFormatName( formats[ fidx ].formattype ),
-            GetGLTypeName( formats[ fidx ].datatype ),
-            sizes[sidx].width,
-            sizes[sidx].height,
-            sizes[sidx].depth);
-
-          error++;
-          break;    // Skip other sizes for this combination
-        }
-      }
 
-      // If we passed all sizes (check versus size loop count):
+            // Texture buffer only takes an internal format, so the level data
+            // passed by the test and used for verification must match the
+            // internal format
+            if ((targets[tidx] == GL_TEXTURE_BUFFER)
+                && (GetGLFormat(formats[fidx].internal)
+                    != formats[fidx].formattype))
+                continue;
+
+            if (formats[fidx].datatype == GL_UNSIGNED_INT_2_10_10_10_REV)
+            {
+                // Check if the RGB 101010 format is supported
+                if (is_rgb_101010_supported(context, targets[tidx]) == 0)
+                    continue; // skip
+            }
+
+            if (formats[fidx].datatype == GL_UNSIGNED_INT_24_8)
+            {
+                // check if a implementation supports writing to the depth
+                // stencil formats
+                cl_image_format imageFormat = { CL_DEPTH_STENCIL,
+                                                CL_UNORM_INT24 };
+                if (!is_image_format_supported(
+                        context, CL_MEM_WRITE_ONLY,
+                        (targets[tidx] == GL_TEXTURE_2D
+                         || targets[tidx] == GL_TEXTURE_RECTANGLE)
+                            ? CL_MEM_OBJECT_IMAGE2D
+                            : CL_MEM_OBJECT_IMAGE2D_ARRAY,
+                        &imageFormat))
+                    continue;
+            }
+
+            if (formats[fidx].datatype == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
+            {
+                // check if a implementation supports writing to the depth
+                // stencil formats
+                cl_image_format imageFormat = { CL_DEPTH_STENCIL, CL_FLOAT };
+                if (!is_image_format_supported(
+                        context, CL_MEM_WRITE_ONLY,
+                        (targets[tidx] == GL_TEXTURE_2D
+                         || targets[tidx] == GL_TEXTURE_RECTANGLE)
+                            ? CL_MEM_OBJECT_IMAGE2D
+                            : CL_MEM_OBJECT_IMAGE2D_ARRAY,
+                        &imageFormat))
+                    continue;
+            }
+
+            if (targets[tidx] != GL_TEXTURE_BUFFER)
+                log_info(
+                    "Testing image write for GL format %s : %s : %s : %s\n",
+                    GetGLTargetName(targets[tidx]),
+                    GetGLFormatName(formats[fidx].internal),
+                    GetGLBaseFormatName(formats[fidx].formattype),
+                    GetGLTypeName(formats[fidx].datatype));
+            else
+                log_info("Testing image write for GL format %s : %s\n",
+                         GetGLTargetName(targets[tidx]),
+                         GetGLFormatName(formats[fidx].internal));
+
+
+            for (sidx = 0; sidx < nsizes; sidx++)
+            {
+
+                // All tested formats are 4-channel formats
+                total_allocation_size = sizes[sidx].width * sizes[sidx].height
+                    * sizes[sidx].depth * 4
+                    * get_explicit_type_size(formats[fidx].type);
+
+                if (total_allocation_size > max_individual_allocation_size)
+                {
+                    log_info("The requested allocation size (%gMB) is larger "
+                             "than the "
+                             "maximum individual allocation size (%gMB)\n",
+                             total_allocation_size / (1024.0 * 1024.0),
+                             max_individual_allocation_size
+                                 / (1024.0 * 1024.0));
+                    log_info("Skipping write test for %s : %s : %s : %s "
+                             " and size (%ld, %ld, %ld)\n",
+                             GetGLTargetName(targets[tidx]),
+                             GetGLFormatName(formats[fidx].internal),
+                             GetGLBaseFormatName(formats[fidx].formattype),
+                             GetGLTypeName(formats[fidx].datatype),
+                             sizes[sidx].width, sizes[sidx].height,
+                             sizes[sidx].depth);
+                    continue;
+                }
+#ifdef GL_VERSION_3_2
+                if (get_base_gl_target(targets[tidx])
+                        == GL_TEXTURE_2D_MULTISAMPLE
+                    || get_base_gl_target(targets[tidx])
+                        == GL_TEXTURE_2D_MULTISAMPLE_ARRAY)
+                {
+                    bool supports_msaa;
+                    int errorInGetInfo = supportsMsaa(context, &supports_msaa);
+                    if (errorInGetInfo != 0) return errorInGetInfo;
+                    if (!supports_msaa) return 0;
+                }
+                if (formats[fidx].formattype == GL_DEPTH_COMPONENT
+                    || formats[fidx].formattype == GL_DEPTH_STENCIL)
+                {
+                    bool supports_depth;
+                    int errorInGetInfo =
+                        supportsDepth(context, &supports_depth);
+                    if (errorInGetInfo != 0) return errorInGetInfo;
+                    if (!supports_depth) return 0;
+                }
+#endif
 
-      if (sidx == nsizes) {
-        log_info( "passed: Image write for GL format  %s : %s : %s : %s\n\n",
-          GetGLTargetName( targets[ tidx ] ),
-          GetGLFormatName( formats[ fidx ].internal ),
-          GetGLBaseFormatName( formats[ fidx ].formattype ),
-          GetGLTypeName( formats[ fidx ].datatype ) );
-      }
+                if (test_image_format_write(
+                        context, queue, sizes[sidx].width, sizes[sidx].height,
+                        sizes[sidx].depth, targets[tidx],
+                        formats[fidx].formattype, formats[fidx].internal,
+                        formats[fidx].datatype, formats[fidx].type, seed))
+                {
+                    log_error(
+                        "ERROR: Image write test failed for %s : %s : %s : %s "
+                        " and size (%ld, %ld, %ld)\n\n",
+                        GetGLTargetName(targets[tidx]),
+                        GetGLFormatName(formats[fidx].internal),
+                        GetGLBaseFormatName(formats[fidx].formattype),
+                        GetGLTypeName(formats[fidx].datatype),
+                        sizes[sidx].width, sizes[sidx].height,
+                        sizes[sidx].depth);
+
+                    error++;
+                    break; // Skip other sizes for this combination
+                }
+            }
+
+            // If we passed all sizes (check versus size loop count):
+
+            if (sidx == nsizes)
+            {
+                log_info(
+                    "passed: Image write for GL format  %s : %s : %s : %s\n\n",
+                    GetGLTargetName(targets[tidx]),
+                    GetGLFormatName(formats[fidx].internal),
+                    GetGLBaseFormatName(formats[fidx].formattype),
+                    GetGLTypeName(formats[fidx].datatype));
+            }
+        }
     }
-  }
 
-  return error;
+    return error;
 }
diff --git a/test_conformance/gl/test_renderbuffer.cpp b/test_conformance/gl/test_renderbuffer.cpp
index d75b4e84..422b5a3d 100644
--- a/test_conformance/gl/test_renderbuffer.cpp
+++ b/test_conformance/gl/test_renderbuffer.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,93 +15,102 @@
 //
 #include "testBase.h"
 
-#if defined( __APPLE__ )
-    #include <OpenGL/glu.h>
+#if defined(__APPLE__)
+#include <OpenGL/glu.h>
 #else
-    #include <GL/glu.h>
-    #include <CL/cl_gl.h>
+#include <GL/glu.h>
+#include <CL/cl_gl.h>
 #endif
 
-#if defined (__linux__)
-GLboolean
-gluCheckExtension(const GLubyte *extension, const GLubyte *extensions)
+#if defined(__linux__)
+GLboolean gluCheckExtension(const GLubyte *extension, const GLubyte *extensions)
 {
-  const GLubyte *start;
-  GLubyte *where, *terminator;
-
-  /* Extension names should not have spaces. */
-  where = (GLubyte *) strchr((const char*)extension, ' ');
-  if (where || *extension == '\0')
+    const GLubyte *start;
+    GLubyte *where, *terminator;
+
+    /* Extension names should not have spaces. */
+    where = (GLubyte *)strchr((const char *)extension, ' ');
+    if (where || *extension == '\0') return 0;
+    /* It takes a bit of care to be fool-proof about parsing the
+       OpenGL extensions string. Don't be fooled by sub-strings,
+       etc. */
+    start = extensions;
+    for (;;)
+    {
+        where = (GLubyte *)strstr((const char *)start, (const char *)extension);
+        if (!where) break;
+        terminator = where + strlen((const char *)extension);
+        if (where == start || *(where - 1) == ' ')
+            if (*terminator == ' ' || *terminator == '\0') return 1;
+        start = terminator;
+    }
     return 0;
-  /* It takes a bit of care to be fool-proof about parsing the
-     OpenGL extensions string. Don't be fooled by sub-strings,
-     etc. */
-  start = extensions;
-  for (;;) {
-    where = (GLubyte *) strstr((const char *) start, (const char*) extension);
-    if (!where)
-      break;
-    terminator = where + strlen((const char*) extension);
-    if (where == start || *(where - 1) == ' ')
-      if (*terminator == ' ' || *terminator == '\0')
-        return 1;
-    start = terminator;
-  }
-  return 0;
 }
 #endif
 
 
 // This is defined in the write common code:
-extern int test_cl_image_write( cl_context context, cl_command_queue queue,
-  GLenum target, cl_mem clImage, size_t width, size_t height, size_t depth,
-  cl_image_format *outFormat, ExplicitType *outType, void **outSourceBuffer,
-  MTdata d, bool supports_half );
-
-extern int test_cl_image_read( cl_context context, cl_command_queue queue,
-  GLenum gl_target, cl_mem image, size_t width, size_t height, size_t depth, size_t sampleNum,
-  cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer );
-
-extern int supportsHalf(cl_context context, bool* supports_half);
-
-static int test_attach_renderbuffer_read_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
-                    size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer )
+extern int test_cl_image_write(cl_context context, cl_command_queue queue,
+                               GLenum target, cl_mem clImage, size_t width,
+                               size_t height, size_t depth,
+                               cl_image_format *outFormat,
+                               ExplicitType *outType, void **outSourceBuffer,
+                               MTdata d, bool supports_half);
+
+extern int test_cl_image_read(cl_context context, cl_command_queue queue,
+                              GLenum gl_target, cl_mem image, size_t width,
+                              size_t height, size_t depth, size_t sampleNum,
+                              cl_image_format *outFormat, ExplicitType *outType,
+                              void **outResultBuffer);
+
+extern int supportsHalf(cl_context context, bool *supports_half);
+
+static int test_attach_renderbuffer_read_image(
+    cl_context context, cl_command_queue queue, GLenum glTarget,
+    GLuint glRenderbuffer, size_t imageWidth, size_t imageHeight,
+    cl_image_format *outFormat, ExplicitType *outType, void **outResultBuffer)
 {
     int error;
 
     // Create a CL image from the supplied GL renderbuffer
-    cl_mem image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_READ_ONLY, glRenderbuffer, &error );
-    if( error != CL_SUCCESS )
+    cl_mem image = (*clCreateFromGLRenderbuffer_ptr)(context, CL_MEM_READ_ONLY,
+                                                     glRenderbuffer, &error);
+    if (error != CL_SUCCESS)
     {
-        print_error( error, "Unable to create CL image from GL renderbuffer" );
+        print_error(error, "Unable to create CL image from GL renderbuffer");
         return error;
     }
 
-    return test_cl_image_read( context, queue, glTarget, image, imageWidth,
-    imageHeight, 1, 1, outFormat, outType, outResultBuffer );
+    return test_cl_image_read(context, queue, glTarget, image, imageWidth,
+                              imageHeight, 1, 1, outFormat, outType,
+                              outResultBuffer);
 }
 
-int test_renderbuffer_read_image( cl_context context, cl_command_queue queue,
-                            GLsizei width, GLsizei height, GLenum attachment,
-                            GLenum format, GLenum internalFormat,
-                            GLenum glType, ExplicitType type, MTdata d )
+int test_renderbuffer_read_image(cl_context context, cl_command_queue queue,
+                                 GLsizei width, GLsizei height,
+                                 GLenum attachment, GLenum format,
+                                 GLenum internalFormat, GLenum glType,
+                                 ExplicitType type, MTdata d)
 {
     int error;
 
-    if( type == kHalf )
-        if( DetectFloatToHalfRoundingMode(queue) )
-            return 1;
+    if (type == kHalf)
+        if (DetectFloatToHalfRoundingMode(queue)) return 1;
 
     // Create the GL renderbuffer
     glFramebufferWrapper glFramebuffer;
     glRenderbufferWrapper glRenderbuffer;
-    void *tmp = CreateGLRenderbuffer( width, height, attachment, format, internalFormat, glType, type, &glFramebuffer, &glRenderbuffer, &error, d, true );
+    void *tmp = CreateGLRenderbuffer(
+        width, height, attachment, format, internalFormat, glType, type,
+        &glFramebuffer, &glRenderbuffer, &error, d, true);
     BufferOwningPtr<char> inputBuffer(tmp);
-    if( error != 0 )
+    if (error != 0)
     {
-        if ((format == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport()))
+        if ((format == GL_RGBA_INTEGER_EXT)
+            && (!CheckGLIntegerExtensionSupport()))
         {
-            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. Skipping test.\n");
+            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+                     "Skipping test.\n");
             return 0;
         }
         else
@@ -114,14 +123,18 @@ int test_renderbuffer_read_image( cl_context context, cl_command_queue queue,
     cl_image_format clFormat;
     ExplicitType actualType;
     char *outBuffer;
-    error = test_attach_renderbuffer_read_image( context, queue, attachment, glRenderbuffer, width, height, &clFormat, &actualType, (void **)&outBuffer );
-    if( error != 0 )
-        return error;
+    error = test_attach_renderbuffer_read_image(
+        context, queue, attachment, glRenderbuffer, width, height, &clFormat,
+        &actualType, (void **)&outBuffer);
+    if (error != 0) return error;
     BufferOwningPtr<char> actualResults(outBuffer);
 
-    log_info( "- Read [%4d x %4d] : GL renderbuffer : %s : %s : %s => CL Image : %s : %s \n", width, height,
-                    GetGLFormatName( format ), GetGLFormatName( internalFormat ), GetGLTypeName( glType),
-                    GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+    log_info("- Read [%4d x %4d] : GL renderbuffer : %s : %s : %s => CL Image "
+             ": %s : %s \n",
+             width, height, GetGLFormatName(format),
+             GetGLFormatName(internalFormat), GetGLTypeName(glType),
+             GetChannelOrderName(clFormat.image_channel_order),
+             GetChannelTypeName(clFormat.image_channel_data_type));
 
 #ifdef DEBUG
     log_info("- start read GL data -- \n");
@@ -129,63 +142,76 @@ int test_renderbuffer_read_image( cl_context context, cl_command_queue queue,
     log_info("- end read GL data -- \n");
 #endif
 
-    // We have to convert our input buffer to the returned type, so we can validate.
-    BufferOwningPtr<char> convertedInput(convert_to_expected( inputBuffer, width * height, type, actualType, get_channel_order_channel_count(clFormat.image_channel_order) ));
+    // We have to convert our input buffer to the returned type, so we can
+    // validate.
+    BufferOwningPtr<char> convertedInput(convert_to_expected(
+        inputBuffer, width * height, type, actualType,
+        get_channel_order_channel_count(clFormat.image_channel_order)));
 
 #ifdef DEBUG
     log_info("- start input data -- \n");
-    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height, convertedInput);
+    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height,
+                 convertedInput);
     log_info("- end input data -- \n");
 #endif
 
 #ifdef DEBUG
     log_info("- start converted data -- \n");
-    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height, actualResults);
+    DumpGLBuffer(GetGLTypeForExplicitType(actualType), width, height,
+                 actualResults);
     log_info("- end converted data -- \n");
 #endif
 
     // Now we validate
     int valid = 0;
-    if(convertedInput) {
-        if( actualType == kFloat )
-            valid = validate_float_results( convertedInput, actualResults, width, height, 1, get_channel_order_channel_count(clFormat.image_channel_order) );
+    if (convertedInput)
+    {
+        if (actualType == kFloat)
+            valid = validate_float_results(
+                convertedInput, actualResults, width, height, 1,
+                get_channel_order_channel_count(clFormat.image_channel_order));
         else
-            valid = validate_integer_results( convertedInput, actualResults, width, height, 1, get_explicit_type_size( actualType ) );
+            valid = validate_integer_results(
+                convertedInput, actualResults, width, height, 1,
+                get_explicit_type_size(actualType));
     }
 
     return valid;
 }
 
-int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_renderbuffer_read(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements)
 {
     GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
 
-    struct {
+    struct
+    {
         GLenum internal;
         GLenum format;
         GLenum datatype;
         ExplicitType type;
 
     } formats[] = {
-        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
-        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
-        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
-        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, kUChar },
+        { GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, kUShort },
 
-// Renderbuffers with integer formats do not seem to work reliably across
-// platforms/implementations. Disabling this in version 1.0 of CL conformance tests.
+    // Renderbuffers with integer formats do not seem to work reliably across
+    // platforms/implementations. Disabling this in version 1.0 of CL
+    // conformance tests.
 
 #ifdef TEST_INTEGER_FORMATS
 
-        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
-        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
-        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
-        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
-        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
-        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA8I_EXT, GL_RGBA_INTEGER_EXT, GL_BYTE, kChar },
+        { GL_RGBA16I_EXT, GL_RGBA_INTEGER_EXT, GL_SHORT, kShort },
+        { GL_RGBA32I_EXT, GL_RGBA_INTEGER_EXT, GL_INT, kInt },
+        { GL_RGBA8UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE, kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT, kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT, kUInt },
 #endif
-        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat },
-        { GL_RGBA16F_ARB,  GL_RGBA,             GL_HALF_FLOAT,               kHalf }
+        { GL_RGBA32F_ARB, GL_RGBA, GL_FLOAT, kFloat },
+        { GL_RGBA16F_ARB, GL_RGBA, GL_HALF_FLOAT, kHalf }
     };
 
     size_t fmtIdx, attIdx;
@@ -195,66 +221,70 @@ int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_
 #else
     size_t iter = 6;
 #endif
-    RandomSeed seed( gRandomSeed );
+    RandomSeed seed(gRandomSeed);
 
-  // Check if images are supported
-  if (checkForImageSupport(device)) {
-    log_info("Device does not support images. Skipping test.\n");
-    return 0;
-  }
+    // Check if images are supported
+    if (checkForImageSupport(device))
+    {
+        log_info("Device does not support images. Skipping test.\n");
+        return 0;
+    }
 
-    if( !gluCheckExtension( (const GLubyte *)"GL_EXT_framebuffer_object", glGetString( GL_EXTENSIONS ) ) )
+    if (!gluCheckExtension((const GLubyte *)"GL_EXT_framebuffer_object",
+                           glGetString(GL_EXTENSIONS)))
     {
-        log_info( "Renderbuffers are not supported by this OpenGL implementation; skipping test\n" );
+        log_info("Renderbuffers are not supported by this OpenGL "
+                 "implementation; skipping test\n");
         return 0;
     }
 
     // Loop through a set of GL formats, testing a set of sizes against each one
-    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    for (fmtIdx = 0; fmtIdx < sizeof(formats) / sizeof(formats[0]); fmtIdx++)
     {
-        for( attIdx = 0; attIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); attIdx++ )
+        for (attIdx = 0; attIdx < sizeof(attachments) / sizeof(attachments[0]);
+             attIdx++)
         {
             size_t i;
 
-            log_info( "Testing renderbuffer read for %s : %s : %s : %s\n",
-                GetGLAttachmentName( attachments[ attIdx ] ),
-                GetGLFormatName( formats[ fmtIdx ].internal ),
-                GetGLBaseFormatName( formats[ fmtIdx ].format ),
-                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            log_info("Testing renderbuffer read for %s : %s : %s : %s\n",
+                     GetGLAttachmentName(attachments[attIdx]),
+                     GetGLFormatName(formats[fmtIdx].internal),
+                     GetGLBaseFormatName(formats[fmtIdx].format),
+                     GetGLTypeName(formats[fmtIdx].datatype));
 
-            for( i = 0; i < iter; i++ )
+            for (i = 0; i < iter; i++)
             {
-                GLsizei width = random_in_range( 16, 512, seed );
-                GLsizei height = random_in_range( 16, 512, seed );
+                GLsizei width = random_in_range(16, 512, seed);
+                GLsizei height = random_in_range(16, 512, seed);
 #ifdef DEBUG
                 width = height = 4;
 #endif
 
-                if( test_renderbuffer_read_image( context, queue, width, height,
-                                                  attachments[ attIdx ],
-                                                  formats[ fmtIdx ].format,
-                                                  formats[ fmtIdx ].internal,
-                                                  formats[ fmtIdx ].datatype,
-                                                  formats[ fmtIdx ].type, seed ) )
+                if (test_renderbuffer_read_image(
+                        context, queue, width, height, attachments[attIdx],
+                        formats[fmtIdx].format, formats[fmtIdx].internal,
+                        formats[fmtIdx].datatype, formats[fmtIdx].type, seed))
 
                 {
-                    log_error( "ERROR: Renderbuffer read test failed for %s : %s : %s : %s\n\n",
-                                GetGLAttachmentName( attachments[ attIdx ] ),
-                                GetGLFormatName( formats[ fmtIdx ].internal ),
-                                GetGLBaseFormatName( formats[ fmtIdx ].format ),
-                                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+                    log_error("ERROR: Renderbuffer read test failed for %s : "
+                              "%s : %s : %s\n\n",
+                              GetGLAttachmentName(attachments[attIdx]),
+                              GetGLFormatName(formats[fmtIdx].internal),
+                              GetGLBaseFormatName(formats[fmtIdx].format),
+                              GetGLTypeName(formats[fmtIdx].datatype));
 
                     error++;
-                    break;    // Skip other sizes for this combination
+                    break; // Skip other sizes for this combination
                 }
             }
-            if( i == iter )
+            if (i == iter)
             {
-                log_info( "passed: Renderbuffer read test passed for %s : %s : %s : %s\n\n",
-                          GetGLAttachmentName( attachments[ attIdx ] ),
-                          GetGLFormatName( formats[ fmtIdx ].internal ),
-                          GetGLBaseFormatName( formats[ fmtIdx ].format ),
-                          GetGLTypeName( formats[ fmtIdx ].datatype ) );
+                log_info("passed: Renderbuffer read test passed for %s : %s : "
+                         "%s : %s\n\n",
+                         GetGLAttachmentName(attachments[attIdx]),
+                         GetGLFormatName(formats[fmtIdx].internal),
+                         GetGLBaseFormatName(formats[fmtIdx].format),
+                         GetGLTypeName(formats[fmtIdx].datatype));
             }
         }
     }
@@ -265,43 +295,52 @@ int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_
 
 #pragma mark -------------------- Write tests -------------------------
 
-int test_attach_renderbuffer_write_to_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
-                     size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, MTdata d, void **outSourceBuffer, bool supports_half )
+int test_attach_renderbuffer_write_to_image(
+    cl_context context, cl_command_queue queue, GLenum glTarget,
+    GLuint glRenderbuffer, size_t imageWidth, size_t imageHeight,
+    cl_image_format *outFormat, ExplicitType *outType, MTdata d,
+    void **outSourceBuffer, bool supports_half)
 {
     int error;
 
     // Create a CL image from the supplied GL renderbuffer
-    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)( context, CL_MEM_WRITE_ONLY, glRenderbuffer, &error );
-    if( error != CL_SUCCESS )
+    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)(
+        context, CL_MEM_WRITE_ONLY, glRenderbuffer, &error);
+    if (error != CL_SUCCESS)
     {
-        print_error( error, "Unable to create CL image from GL renderbuffer" );
+        print_error(error, "Unable to create CL image from GL renderbuffer");
         return error;
     }
 
-    return test_cl_image_write( context, queue, glTarget, image, imageWidth,
-    imageHeight, 1, outFormat, outType, outSourceBuffer, d, supports_half );
+    return test_cl_image_write(context, queue, glTarget, image, imageWidth,
+                               imageHeight, 1, outFormat, outType,
+                               outSourceBuffer, d, supports_half);
 }
 
-int test_renderbuffer_image_write( cl_context context, cl_command_queue queue,
-                                   GLsizei width, GLsizei height, GLenum attachment,
-                                   GLenum format, GLenum internalFormat,
-                                     GLenum glType, ExplicitType type, MTdata d )
+int test_renderbuffer_image_write(cl_context context, cl_command_queue queue,
+                                  GLsizei width, GLsizei height,
+                                  GLenum attachment, GLenum format,
+                                  GLenum internalFormat, GLenum glType,
+                                  ExplicitType type, MTdata d)
 {
     int error;
 
-    if( type == kHalf )
-        if( DetectFloatToHalfRoundingMode(queue) )
-            return 1;
+    if (type == kHalf)
+        if (DetectFloatToHalfRoundingMode(queue)) return 1;
 
     // Create the GL renderbuffer
     glFramebufferWrapper glFramebuffer;
     glRenderbufferWrapper glRenderbuffer;
-    CreateGLRenderbuffer( width, height, attachment, format, internalFormat, glType, type, &glFramebuffer, &glRenderbuffer, &error, d, false );
-    if( error != 0 )
+    CreateGLRenderbuffer(width, height, attachment, format, internalFormat,
+                         glType, type, &glFramebuffer, &glRenderbuffer, &error,
+                         d, false);
+    if (error != 0)
     {
-        if ((format == GL_RGBA_INTEGER_EXT) && (!CheckGLIntegerExtensionSupport()))
+        if ((format == GL_RGBA_INTEGER_EXT)
+            && (!CheckGLIntegerExtensionSupport()))
         {
-            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. Skipping test.\n");
+            log_info("OpenGL version does not support GL_RGBA_INTEGER_EXT. "
+                     "Skipping test.\n");
             return 0;
         }
         else
@@ -318,27 +357,34 @@ int test_renderbuffer_image_write( cl_context context, cl_command_queue queue,
 
     bool supports_half = false;
     error = supportsHalf(context, &supports_half);
-    if( error != 0 )
-        return error;
+    if (error != 0) return error;
 
-    error = test_attach_renderbuffer_write_to_image( context, queue, attachment, glRenderbuffer, width, height, &clFormat, &sourceType, d, (void **)&outSourceBuffer, supports_half );
-    if( error != 0 || ((sourceType == kHalf ) && !supports_half))
-        return error;
+    error = test_attach_renderbuffer_write_to_image(
+        context, queue, attachment, glRenderbuffer, width, height, &clFormat,
+        &sourceType, d, (void **)&outSourceBuffer, supports_half);
+    if (error != 0 || ((sourceType == kHalf) && !supports_half)) return error;
 
     // If actual source type was half, convert to float for validation.
-    if( sourceType == kHalf )
+    if (sourceType == kHalf)
         validationType = kFloat;
     else
         validationType = sourceType;
 
-    BufferOwningPtr<char> validationSource( convert_to_expected( outSourceBuffer, width * height, sourceType, validationType, get_channel_order_channel_count(clFormat.image_channel_order) ) );
+    BufferOwningPtr<char> validationSource(convert_to_expected(
+        outSourceBuffer, width * height, sourceType, validationType,
+        get_channel_order_channel_count(clFormat.image_channel_order)));
 
-    log_info( "- Write [%4d x %4d] : GL Renderbuffer : %s : %s : %s => CL Image : %s : %s \n", width, height,
-                    GetGLFormatName( format ), GetGLFormatName( internalFormat ), GetGLTypeName( glType),
-                    GetChannelOrderName( clFormat.image_channel_order ), GetChannelTypeName( clFormat.image_channel_data_type ));
+    log_info("- Write [%4d x %4d] : GL Renderbuffer : %s : %s : %s => CL Image "
+             ": %s : %s \n",
+             width, height, GetGLFormatName(format),
+             GetGLFormatName(internalFormat), GetGLTypeName(glType),
+             GetChannelOrderName(clFormat.image_channel_order),
+             GetChannelTypeName(clFormat.image_channel_data_type));
 
     // Now read the results from the GL renderbuffer
-    BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer, attachment, format, internalFormat, glType, type, width, height ) );
+    BufferOwningPtr<char> resultData(
+        ReadGLRenderbuffer(glFramebuffer, glRenderbuffer, attachment, format,
+                           internalFormat, glType, type, width, height));
 
 #ifdef DEBUG
     log_info("- start result data -- \n");
@@ -346,63 +392,76 @@ int test_renderbuffer_image_write( cl_context context, cl_command_queue queue,
     log_info("- end result data -- \n");
 #endif
 
-    // We have to convert our input buffer to the returned type, so we can validate.
-    BufferOwningPtr<char> convertedData( convert_to_expected( resultData, width * height, type, validationType, get_channel_order_channel_count(clFormat.image_channel_order) ) );
+    // We have to convert our input buffer to the returned type, so we can
+    // validate.
+    BufferOwningPtr<char> convertedData(convert_to_expected(
+        resultData, width * height, type, validationType,
+        get_channel_order_channel_count(clFormat.image_channel_order)));
 
 #ifdef DEBUG
     log_info("- start input data -- \n");
-    DumpGLBuffer(GetGLTypeForExplicitType(validationType), width, height, validationSource);
+    DumpGLBuffer(GetGLTypeForExplicitType(validationType), width, height,
+                 validationSource);
     log_info("- end input data -- \n");
 #endif
 
 #ifdef DEBUG
     log_info("- start converted data -- \n");
-    DumpGLBuffer(GetGLTypeForExplicitType(validationType), width, height, convertedData);
+    DumpGLBuffer(GetGLTypeForExplicitType(validationType), width, height,
+                 convertedData);
     log_info("- end converted data -- \n");
 #endif
 
     // Now we validate
     int valid = 0;
-    if(convertedData) {
-        if( sourceType == kFloat || sourceType == kHalf )
-            valid = validate_float_results( validationSource, convertedData, width, height, 1, get_channel_order_channel_count(clFormat.image_channel_order) );
+    if (convertedData)
+    {
+        if (sourceType == kFloat || sourceType == kHalf)
+            valid = validate_float_results(
+                validationSource, convertedData, width, height, 1,
+                get_channel_order_channel_count(clFormat.image_channel_order));
         else
-            valid = validate_integer_results( validationSource, convertedData, width, height, 1, get_explicit_type_size( type ) );
+            valid = validate_integer_results(validationSource, convertedData,
+                                             width, height, 1,
+                                             get_explicit_type_size(type));
     }
 
     return valid;
 }
 
-int test_renderbuffer_write( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_renderbuffer_write(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int numElements)
 {
     GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
 
-    struct {
+    struct
+    {
         GLenum internal;
         GLenum format;
         GLenum datatype;
         ExplicitType type;
 
     } formats[] = {
-        { GL_RGBA,         GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
-        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
-        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
-        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
+        { GL_RGBA, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar },
+        { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, kUChar },
+        { GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, kUShort },
 
-// Renderbuffers with integer formats do not seem to work reliably across
-// platforms/implementations. Disabling this in version 1.0 of CL conformance tests.
+    // Renderbuffers with integer formats do not seem to work reliably across
+    // platforms/implementations. Disabling this in version 1.0 of CL
+    // conformance tests.
 
 #ifdef TEST_INTEGER_FORMATS
 
-        { GL_RGBA8I_EXT,   GL_RGBA_INTEGER_EXT, GL_BYTE,                     kChar },
-        { GL_RGBA16I_EXT,  GL_RGBA_INTEGER_EXT, GL_SHORT,                    kShort },
-        { GL_RGBA32I_EXT,  GL_RGBA_INTEGER_EXT, GL_INT,                      kInt },
-        { GL_RGBA8UI_EXT,  GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE,            kUChar },
-        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT,           kUShort },
-        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT,             kUInt },
+        { GL_RGBA8I_EXT, GL_RGBA_INTEGER_EXT, GL_BYTE, kChar },
+        { GL_RGBA16I_EXT, GL_RGBA_INTEGER_EXT, GL_SHORT, kShort },
+        { GL_RGBA32I_EXT, GL_RGBA_INTEGER_EXT, GL_INT, kInt },
+        { GL_RGBA8UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE, kUChar },
+        { GL_RGBA16UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_SHORT, kUShort },
+        { GL_RGBA32UI_EXT, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_INT, kUInt },
 #endif
-        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat },
-        { GL_RGBA16F_ARB,  GL_RGBA,             GL_HALF_FLOAT,               kHalf }
+        { GL_RGBA32F_ARB, GL_RGBA, GL_FLOAT, kFloat },
+        { GL_RGBA16F_ARB, GL_RGBA, GL_HALF_FLOAT, kHalf }
     };
 
     size_t fmtIdx, attIdx;
@@ -411,64 +470,68 @@ int test_renderbuffer_write( cl_device_id device, cl_context context, cl_command
 #ifdef DEBUG
     iter = 1;
 #endif
-    RandomSeed seed( gRandomSeed );
+    RandomSeed seed(gRandomSeed);
 
-  // Check if images are supported
-  if (checkForImageSupport(device)) {
-    log_info("Device does not support images. Skipping test.\n");
-    return 0;
-  }
+    // Check if images are supported
+    if (checkForImageSupport(device))
+    {
+        log_info("Device does not support images. Skipping test.\n");
+        return 0;
+    }
 
-    if( !gluCheckExtension( (const GLubyte *)"GL_EXT_framebuffer_object", glGetString( GL_EXTENSIONS ) ) )
+    if (!gluCheckExtension((const GLubyte *)"GL_EXT_framebuffer_object",
+                           glGetString(GL_EXTENSIONS)))
     {
-        log_info( "Renderbuffers are not supported by this OpenGL implementation; skipping test\n" );
+        log_info("Renderbuffers are not supported by this OpenGL "
+                 "implementation; skipping test\n");
         return 0;
     }
 
     // Loop through a set of GL formats, testing a set of sizes against each one
-    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    for (fmtIdx = 0; fmtIdx < sizeof(formats) / sizeof(formats[0]); fmtIdx++)
     {
-        for( attIdx = 0; attIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); attIdx++ )
+        for (attIdx = 0; attIdx < sizeof(attachments) / sizeof(attachments[0]);
+             attIdx++)
         {
-            log_info( "Testing Renderbuffer write test for %s : %s : %s : %s\n",
-                GetGLAttachmentName( attachments[ attIdx ] ),
-                GetGLFormatName( formats[ fmtIdx ].internal ),
-                GetGLBaseFormatName( formats[ fmtIdx ].format ),
-                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            log_info("Testing Renderbuffer write test for %s : %s : %s : %s\n",
+                     GetGLAttachmentName(attachments[attIdx]),
+                     GetGLFormatName(formats[fmtIdx].internal),
+                     GetGLBaseFormatName(formats[fmtIdx].format),
+                     GetGLTypeName(formats[fmtIdx].datatype));
 
             size_t i;
-            for( i = 0; i < iter; i++ )
+            for (i = 0; i < iter; i++)
             {
-                GLsizei width = random_in_range( 16, 512, seed );
-                GLsizei height = random_in_range( 16, 512, seed );
+                GLsizei width = random_in_range(16, 512, seed);
+                GLsizei height = random_in_range(16, 512, seed);
 #ifdef DEBUG
                 width = height = 4;
 #endif
 
-                if( test_renderbuffer_image_write( context, queue, width, height,
-                                                   attachments[ attIdx ],
-                                                   formats[ fmtIdx ].format,
-                                                   formats[ fmtIdx ].internal,
-                                                   formats[ fmtIdx ].datatype,
-                                                   formats[ fmtIdx ].type, seed ) )
+                if (test_renderbuffer_image_write(
+                        context, queue, width, height, attachments[attIdx],
+                        formats[fmtIdx].format, formats[fmtIdx].internal,
+                        formats[fmtIdx].datatype, formats[fmtIdx].type, seed))
                 {
-                    log_error( "ERROR: Renderbuffer write test failed for %s : %s : %s : %s\n\n",
-                          GetGLAttachmentName( attachments[ attIdx ] ),
-                          GetGLFormatName( formats[ fmtIdx ].internal ),
-                          GetGLBaseFormatName( formats[ fmtIdx ].format ),
-                          GetGLTypeName( formats[ fmtIdx ].datatype ) );
+                    log_error("ERROR: Renderbuffer write test failed for %s : "
+                              "%s : %s : %s\n\n",
+                              GetGLAttachmentName(attachments[attIdx]),
+                              GetGLFormatName(formats[fmtIdx].internal),
+                              GetGLBaseFormatName(formats[fmtIdx].format),
+                              GetGLTypeName(formats[fmtIdx].datatype));
 
                     error++;
-                    break;    // Skip other sizes for this combination
+                    break; // Skip other sizes for this combination
                 }
             }
-            if( i == iter )
+            if (i == iter)
             {
-                log_info( "passed: Renderbuffer write test passed for %s : %s : %s : %s\n\n",
-                          GetGLAttachmentName( attachments[ attIdx ] ),
-                          GetGLFormatName( formats[ fmtIdx ].internal ),
-                          GetGLBaseFormatName( formats[ fmtIdx ].format ),
-                          GetGLTypeName( formats[ fmtIdx ].datatype ) );
+                log_info("passed: Renderbuffer write test passed for %s : %s : "
+                         "%s : %s\n\n",
+                         GetGLAttachmentName(attachments[attIdx]),
+                         GetGLFormatName(formats[fmtIdx].internal),
+                         GetGLBaseFormatName(formats[fmtIdx].format),
+                         GetGLTypeName(formats[fmtIdx].datatype));
             }
         }
     }
diff --git a/test_conformance/gl/test_renderbuffer_info.cpp b/test_conformance/gl/test_renderbuffer_info.cpp
index bb5ce848..d14b6032 100644
--- a/test_conformance/gl/test_renderbuffer_info.cpp
+++ b/test_conformance/gl/test_renderbuffer_info.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -14,118 +14,125 @@
 // limitations under the License.
 //
 #include "testBase.h"
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
 #include <OpenGL/glu.h>
 #else
 #include <GL/glu.h>
 #include <CL/cl_gl.h>
 #endif
 
-static int test_renderbuffer_object_info( cl_context context, cl_command_queue queue,
-                                          GLsizei width, GLsizei height, GLenum attachment,
-                                          GLenum format, GLenum internalFormat,
-                                          GLenum glType, ExplicitType type, MTdata d )
+static int test_renderbuffer_object_info(cl_context context,
+                                         cl_command_queue queue, GLsizei width,
+                                         GLsizei height, GLenum attachment,
+                                         GLenum format, GLenum internalFormat,
+                                         GLenum glType, ExplicitType type,
+                                         MTdata d)
 {
     int error;
 
-    if( type == kHalf )
-        if( DetectFloatToHalfRoundingMode(queue) )
-            return 1;
+    if (type == kHalf)
+        if (DetectFloatToHalfRoundingMode(queue)) return 1;
 
     // Create the GL render buffer
     glFramebufferWrapper glFramebuffer;
     glRenderbufferWrapper glRenderbuffer;
-    BufferOwningPtr<char> inputBuffer(CreateGLRenderbuffer( width, height, attachment, format, internalFormat, glType, type, &glFramebuffer, &glRenderbuffer, &error, d, true ));
-    if( error != 0 )
-        return error;
+    BufferOwningPtr<char> inputBuffer(CreateGLRenderbuffer(
+        width, height, attachment, format, internalFormat, glType, type,
+        &glFramebuffer, &glRenderbuffer, &error, d, true));
+    if (error != 0) return error;
 
-    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)(context, CL_MEM_READ_ONLY, glRenderbuffer, &error);
+    clMemWrapper image = (*clCreateFromGLRenderbuffer_ptr)(
+        context, CL_MEM_READ_ONLY, glRenderbuffer, &error);
     test_error(error, "clCreateFromGLRenderbuffer failed");
 
-    log_info( "- Given a GL format of %s, input type was %s, size was %d x %d\n",
-              GetGLFormatName( internalFormat ),
-              get_explicit_type_name( type ), (int)width, (int)height );
+    log_info("- Given a GL format of %s, input type was %s, size was %d x %d\n",
+             GetGLFormatName(internalFormat), get_explicit_type_name(type),
+             (int)width, (int)height);
 
     // Verify the expected information here.
-    return CheckGLObjectInfo(image, CL_GL_OBJECT_RENDERBUFFER, (GLuint)glRenderbuffer, internalFormat, 0);
+    return CheckGLObjectInfo(image, CL_GL_OBJECT_RENDERBUFFER,
+                             (GLuint)glRenderbuffer, internalFormat, 0);
 }
 
-int test_renderbuffer_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_renderbuffer_getinfo(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int numElements)
 {
     GLenum attachments[] = { GL_COLOR_ATTACHMENT0_EXT };
 
-    struct {
+    struct
+    {
         GLenum internal;
         GLenum format;
         GLenum datatype;
         ExplicitType type;
 
-    } formats[] = {
-        { GL_RGBA,         GL_RGBA,             GL_UNSIGNED_BYTE,             kUChar },
-        { GL_RGBA8,        GL_RGBA,             GL_UNSIGNED_BYTE,            kUChar },
-        { GL_RGBA16,       GL_RGBA,             GL_UNSIGNED_SHORT,           kUShort },
-        { GL_RGBA32F_ARB,  GL_RGBA,             GL_FLOAT,                    kFloat },
-        { GL_RGBA16F_ARB,  GL_RGBA,             GL_HALF_FLOAT,               kHalf }
-    };
+    } formats[] = { { GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE, kUChar },
+                    { GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, kUChar },
+                    { GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, kUShort },
+                    { GL_RGBA32F_ARB, GL_RGBA, GL_FLOAT, kFloat },
+                    { GL_RGBA16F_ARB, GL_RGBA, GL_HALF_FLOAT, kHalf } };
 
     size_t fmtIdx, tgtIdx;
     int error = 0;
     size_t iter = 6;
     RandomSeed seed(gRandomSeed);
 
-  // Check if images are supported
-  if (checkForImageSupport(device)) {
-    log_info("Device does not support images. Skipping test.\n");
-    return 0;
-  }
+    // Check if images are supported
+    if (checkForImageSupport(device))
+    {
+        log_info("Device does not support images. Skipping test.\n");
+        return 0;
+    }
 
-    if( !gluCheckExtension( (const GLubyte *)"GL_EXT_framebuffer_object", glGetString( GL_EXTENSIONS ) ) )
+    if (!gluCheckExtension((const GLubyte *)"GL_EXT_framebuffer_object",
+                           glGetString(GL_EXTENSIONS)))
     {
-        log_info( "Renderbuffers are not supported by this OpenGL implementation; skipping test\n" );
+        log_info("Renderbuffers are not supported by this OpenGL "
+                 "implementation; skipping test\n");
         return 0;
     }
 
     // Loop through a set of GL formats, testing a set of sizes against each one
-    for( fmtIdx = 0; fmtIdx < sizeof( formats ) / sizeof( formats[ 0 ] ); fmtIdx++ )
+    for (fmtIdx = 0; fmtIdx < sizeof(formats) / sizeof(formats[0]); fmtIdx++)
     {
-        for( tgtIdx = 0; tgtIdx < sizeof( attachments ) / sizeof( attachments[ 0 ] ); tgtIdx++ )
+        for (tgtIdx = 0; tgtIdx < sizeof(attachments) / sizeof(attachments[0]);
+             tgtIdx++)
         {
-            log_info( "Testing Renderbuffer object info for %s : %s : %s\n",
-                GetGLFormatName( formats[ fmtIdx ].internal ),
-                GetGLBaseFormatName( formats[ fmtIdx ].format ),
-                GetGLTypeName( formats[ fmtIdx ].datatype ) );
+            log_info("Testing Renderbuffer object info for %s : %s : %s\n",
+                     GetGLFormatName(formats[fmtIdx].internal),
+                     GetGLBaseFormatName(formats[fmtIdx].format),
+                     GetGLTypeName(formats[fmtIdx].datatype));
 
             size_t i;
-            for( i = 0; i < iter; i++ )
+            for (i = 0; i < iter; i++)
             {
-                GLsizei width = random_in_range( 16, 512, seed );
-                GLsizei height = random_in_range( 16, 512, seed );
-
-                if( test_renderbuffer_object_info( context, queue, (int)width, (int)height,
-                                                   attachments[ tgtIdx ],
-                                                   formats[ fmtIdx ].format,
-                                                   formats[ fmtIdx ].internal,
-                                                   formats[ fmtIdx ].datatype,
-                                                   formats[ fmtIdx ].type, seed ) )
+                GLsizei width = random_in_range(16, 512, seed);
+                GLsizei height = random_in_range(16, 512, seed);
+
+                if (test_renderbuffer_object_info(
+                        context, queue, (int)width, (int)height,
+                        attachments[tgtIdx], formats[fmtIdx].format,
+                        formats[fmtIdx].internal, formats[fmtIdx].datatype,
+                        formats[fmtIdx].type, seed))
                 {
-                    log_error( "ERROR: Renderbuffer write test failed for GL format %s : %s\n\n",
-                        GetGLFormatName( formats[ fmtIdx ].internal ),
-                        GetGLTypeName( formats[ fmtIdx ].datatype ) );
+                    log_error("ERROR: Renderbuffer write test failed for GL "
+                              "format %s : %s\n\n",
+                              GetGLFormatName(formats[fmtIdx].internal),
+                              GetGLTypeName(formats[fmtIdx].datatype));
 
                     error++;
-                    break;    // Skip other sizes for this combination
+                    break; // Skip other sizes for this combination
                 }
             }
-            if( i == iter )
+            if (i == iter)
             {
-                log_info( "passed: Renderbuffer write test passed for GL format %s : %s\n\n",
-                    GetGLFormatName( formats[ fmtIdx ].internal ),
-                    GetGLTypeName( formats[ fmtIdx ].datatype ) );
-
+                log_info("passed: Renderbuffer write test passed for GL format "
+                         "%s : %s\n\n",
+                         GetGLFormatName(formats[fmtIdx].internal),
+                         GetGLTypeName(formats[fmtIdx].datatype));
             }
         }
     }
 
     return error;
 }
-
diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
index 60e020d8..0327b70e 100644
--- a/test_conformance/gles/main.cpp
+++ b/test_conformance/gles/main.cpp
@@ -266,9 +266,13 @@ int main(int argc, const char *argv[])
     }
 
         // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
-        error = parseAndCallCommandLineTests( argc_tmp, argv_tmp, deviceIDs[i], test_num, test_list, true, 0, 1024 );
-        if( error != 0 )
-          break;
+    test_harness_config config{};
+    config.forceNoContextCreation = true;
+    config.numElementsToUse = 1024;
+    config.queueProps = 0;
+    error = parseAndCallCommandLineTests(argc_tmp, argv_tmp, deviceIDs[i],
+                                         test_num, test_list, config);
+    if (error != 0) break;
     }
 
     // Clean-up.
@@ -338,7 +342,12 @@ int main(int argc, const char *argv[])
         goto cleanup;
 #else
         // Note: don't use the entire harness, because we have a different way of obtaining the device (via the context)
-        error = parseAndCallCommandLineTests( argc_, argv_, deviceIDs[ i ], test_num32, test_list32, true, 0, 1024 );
+        test_harness_config config{};
+        config.forceNoContextCreation = true;
+        config.numElementsToUse = 1024;
+        config.queueProps = 0;
+        error = parseAndCallCommandLineTests(argc_, argv_, deviceIDs[i],
+                                             test_num32, test_list32, config);
         if( error != 0 )
           break;
 #endif
diff --git a/test_conformance/gles/testBase.h b/test_conformance/gles/testBase.h
index 09e8a836..861b8a0f 100644
--- a/test_conformance/gles/testBase.h
+++ b/test_conformance/gles/testBase.h
@@ -38,7 +38,6 @@
 
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index 591470f0..efaceaf7 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -70,8 +70,7 @@ typedef struct CheckResultInfoD_
     int vsz;
 } CheckResultInfoD;
 
-static cl_int
-ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
 {
     ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
     cl_uint lim = cri->lim;
@@ -83,10 +82,10 @@ ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
     cl_ulong i = cri->i + off;
     cl_uint j;
 
-    if (off + count > lim)
-        count = lim - off;
+    if (off + count > lim) count = lim - off;
 
-    for (j = 0; j < count; ++j) {
+    for (j = 0; j < count; ++j)
+    {
         x[j] = as_float((cl_uint)(i + j));
         r[j] = f(x[j]);
     }
@@ -94,8 +93,7 @@ ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
     return 0;
 }
 
-static cl_int
-CheckF(cl_uint jid, cl_uint tid, void *userInfo)
+static cl_int CheckF(cl_uint jid, cl_uint tid, void *userInfo)
 {
     CheckResultInfoF *cri = (CheckResultInfoF *)userInfo;
     cl_uint lim = cri->lim;
@@ -106,33 +104,33 @@ CheckF(cl_uint jid, cl_uint tid, void *userInfo)
     const cl_ushort *s = cri->s + off;
     f2h f = cri->f;
     cl_uint j;
-    cl_ushort correct2 = f( 0.0f);
+    cl_ushort correct2 = f(0.0f);
     cl_ushort correct3 = f(-0.0f);
     cl_int ret = 0;
 
-    if (off + count > lim)
-        count = lim - off;
+    if (off + count > lim) count = lim - off;
 
-    if (!memcmp(r, s, count*sizeof(cl_ushort)))
-        return 0;
+    if (!memcmp(r, s, count * sizeof(cl_ushort))) return 0;
 
-    for (j = 0; j < count; j++) {
+    for (j = 0; j < count; j++)
+    {
         if (s[j] == r[j]) continue;
 
         // Pass any NaNs
-        if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00 )
-            continue;
+        if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00) continue;
 
         // retry per section 6.5.3.3
         if (IsFloatSubnormal(x[j]) && (s[j] == correct2 || s[j] == correct3))
             continue;
 
         // if reference result is subnormal, pass any zero
-        if (gIsEmbedded && IsHalfSubnormal(r[j]) && (s[j] == 0x0000 || s[j] == 0x8000))
+        if (gIsEmbedded && IsHalfSubnormal(r[j])
+            && (s[j] == 0x0000 || s[j] == 0x8000))
             continue;
 
-        vlog_error("\nFailure at [%u] with %.6a: *0x%04x vs 0x%04x,  vector_size = %d, address_space = %s\n",
-                   j+off, x[j], r[j], s[j], cri->vsz, cri->aspace);
+        vlog_error("\nFailure at [%u] with %.6a: *0x%04x vs 0x%04x,  "
+                   "vector_size = %d, address_space = %s\n",
+                   j + off, x[j], r[j], s[j], cri->vsz, cri->aspace);
 
         ret = 1;
         break;
@@ -141,8 +139,7 @@ CheckF(cl_uint jid, cl_uint tid, void *userInfo)
     return ret;
 }
 
-static cl_int
-ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
+static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
 {
     ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
     cl_uint lim = cri->lim;
@@ -154,10 +151,10 @@ ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
     cl_uint j;
     cl_ulong i = cri->i + off;
 
-    if (off + count > lim)
-        count = lim - off;
+    if (off + count > lim) count = lim - off;
 
-    for (j = 0; j < count; ++j) {
+    for (j = 0; j < count; ++j)
+    {
         x[j] = as_double(DoubleFromUInt((cl_uint)(i + j)));
         r[j] = f(x[j]);
     }
@@ -165,8 +162,7 @@ ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
     return 0;
 }
 
-static cl_int
-CheckD(cl_uint jid, cl_uint tid, void *userInfo)
+static cl_int CheckD(cl_uint jid, cl_uint tid, void *userInfo)
 {
     CheckResultInfoD *cri = (CheckResultInfoD *)userInfo;
     cl_uint lim = cri->lim;
@@ -177,35 +173,35 @@ CheckD(cl_uint jid, cl_uint tid, void *userInfo)
     const cl_ushort *s = cri->s + off;
     d2h f = cri->f;
     cl_uint j;
-    cl_ushort correct2 = f( 0.0);
+    cl_ushort correct2 = f(0.0);
     cl_ushort correct3 = f(-0.0);
     cl_int ret = 0;
 
-    if (off + count > lim)
-        count = lim - off;
+    if (off + count > lim) count = lim - off;
 
-    if (!memcmp(r, s, count*sizeof(cl_ushort)))
-        return 0;
+    if (!memcmp(r, s, count * sizeof(cl_ushort))) return 0;
 
-    for (j = 0; j < count; j++) {
+    for (j = 0; j < count; j++)
+    {
         if (s[j] == r[j]) continue;
 
         // Pass any NaNs
-        if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00)
-            continue;
+        if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00) continue;
 
         if (IsDoubleSubnormal(x[j]) && (s[j] == correct2 || s[j] == correct3))
             continue;
 
         // if reference result is subnormal, pass any zero result
-        if (gIsEmbedded && IsHalfSubnormal(r[j]) && (s[j] == 0x0000 || s[j] == 0x8000))
+        if (gIsEmbedded && IsHalfSubnormal(r[j])
+            && (s[j] == 0x0000 || s[j] == 0x8000))
             continue;
 
-        vlog_error("\nFailure at [%u] with %.13la: *0x%04x vs 0x%04x, vector_size = %d, address space = %s (double precision)\n",
-                   j+off, x[j], r[j], s[j], cri->vsz, cri->aspace);
+        vlog_error("\nFailure at [%u] with %.13la: *0x%04x vs 0x%04x, "
+                   "vector_size = %d, address space = %s (double precision)\n",
+                   j + off, x[j], r[j], s[j], cri->vsz, cri->aspace);
 
         ret = 1;
-    break;
+        break;
     }
 
     return ret;
@@ -251,100 +247,129 @@ static cl_half double2half_rtn(double f)
     return cl_half_from_double(f, CL_HALF_RTN);
 }
 
-int test_vstore_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstore_half(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
     switch (get_default_rounding_mode(deviceID))
     {
         case CL_FP_ROUND_TO_ZERO:
-            return Test_vStoreHalf_private(deviceID, float2half_rtz, double2half_rte, "");
-        case 0:
-            return -1;
+            return Test_vStoreHalf_private(deviceID, float2half_rtz,
+                                           double2half_rte, "");
+        case 0: return -1;
         default:
-            return Test_vStoreHalf_private(deviceID, float2half_rte, double2half_rte, "");
+            return Test_vStoreHalf_private(deviceID, float2half_rte,
+                                           double2half_rte, "");
     }
 }
 
-int test_vstore_half_rte( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstore_half_rte(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreHalf_private(deviceID, float2half_rte, double2half_rte, "_rte");
+    return Test_vStoreHalf_private(deviceID, float2half_rte, double2half_rte,
+                                   "_rte");
 }
 
-int test_vstore_half_rtz( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstore_half_rtz(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreHalf_private(deviceID, float2half_rtz, double2half_rtz, "_rtz");
+    return Test_vStoreHalf_private(deviceID, float2half_rtz, double2half_rtz,
+                                   "_rtz");
 }
 
-int test_vstore_half_rtp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstore_half_rtp(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreHalf_private(deviceID, float2half_rtp, double2half_rtp, "_rtp");
+    return Test_vStoreHalf_private(deviceID, float2half_rtp, double2half_rtp,
+                                   "_rtp");
 }
 
-int test_vstore_half_rtn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstore_half_rtn(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreHalf_private(deviceID, float2half_rtn, double2half_rtn, "_rtn");
+    return Test_vStoreHalf_private(deviceID, float2half_rtn, double2half_rtn,
+                                   "_rtn");
 }
 
-int test_vstorea_half( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstorea_half(cl_device_id deviceID, cl_context context,
+                      cl_command_queue queue, int num_elements)
 {
     switch (get_default_rounding_mode(deviceID))
     {
         case CL_FP_ROUND_TO_ZERO:
-            return Test_vStoreaHalf_private(deviceID,float2half_rtz, double2half_rte, "");
-        case 0:
-            return -1;
+            return Test_vStoreaHalf_private(deviceID, float2half_rtz,
+                                            double2half_rte, "");
+        case 0: return -1;
         default:
-            return Test_vStoreaHalf_private(deviceID, float2half_rte, double2half_rte, "");
+            return Test_vStoreaHalf_private(deviceID, float2half_rte,
+                                            double2half_rte, "");
     }
 }
 
-int test_vstorea_half_rte( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstorea_half_rte(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreaHalf_private(deviceID, float2half_rte, double2half_rte, "_rte");
+    return Test_vStoreaHalf_private(deviceID, float2half_rte, double2half_rte,
+                                    "_rte");
 }
 
-int test_vstorea_half_rtz( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstorea_half_rtz(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreaHalf_private(deviceID, float2half_rtz, double2half_rtz, "_rtz");
+    return Test_vStoreaHalf_private(deviceID, float2half_rtz, double2half_rtz,
+                                    "_rtz");
 }
 
-int test_vstorea_half_rtp( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstorea_half_rtp(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreaHalf_private(deviceID, float2half_rtp, double2half_rtp, "_rtp");
+    return Test_vStoreaHalf_private(deviceID, float2half_rtp, double2half_rtp,
+                                    "_rtp");
 }
 
-int test_vstorea_half_rtn( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_vstorea_half_rtn(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-    return Test_vStoreaHalf_private(deviceID, float2half_rtn, double2half_rtn, "_rtn");
+    return Test_vStoreaHalf_private(deviceID, float2half_rtn, double2half_rtn,
+                                    "_rtn");
 }
 
 #pragma mark -
 
-int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleReferenceFunc, const char *roundName )
+int Test_vStoreHalf_private(cl_device_id device, f2h referenceFunc,
+                            d2h doubleReferenceFunc, const char *roundName)
 {
     int vectorSize, error;
-    cl_program  programs[kVectorSizeCount+kStrangeVectorSizeCount][3];
-    cl_kernel   kernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
-
-    uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    memset( min_time, -1, sizeof( min_time ) );
-    cl_program  doublePrograms[kVectorSizeCount+kStrangeVectorSizeCount][3];
-    cl_kernel   doubleKernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
-    uint64_t doubleTime[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    uint64_t min_double_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    memset( min_double_time, -1, sizeof( min_double_time ) );
-
-    bool aligned= false;
-
-    for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    cl_program programs[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    cl_kernel kernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    cl_program resetProgram;
+    cl_kernel resetKernel;
+
+    uint64_t time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
+    uint64_t min_time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
+    memset(min_time, -1, sizeof(min_time));
+    cl_program doublePrograms[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    cl_kernel doubleKernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    uint64_t doubleTime[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
+    uint64_t min_double_time[kVectorSizeCount + kStrangeVectorSizeCount] = {
+        0
+    };
+    memset(min_double_time, -1, sizeof(min_double_time));
+
+    bool aligned = false;
+
+    for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest;
+         vectorSize++)
     {
-        const char *source[] = {
-            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
-            "{\n"
-            "   size_t i = get_global_id(0);\n"
-            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
-            "}\n"
-        };
+        const char *source[] = { "__kernel void test( __global float",
+                                 vector_size_name_extensions[vectorSize],
+                                 " *p, __global half *f )\n"
+                                 "{\n"
+                                 "   size_t i = get_global_id(0);\n"
+                                 "   vstore_half",
+                                 vector_size_name_extensions[vectorSize],
+                                 roundName,
+                                 "( p[i], i, f );\n"
+                                 "}\n" };
 
         const char *source_v3[] = {
             "__kernel void test( __global float *p, __global half *f,\n"
@@ -356,21 +381,29 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "   if(last_i == i && extra_last_thread != 0) {\n"
             "     adjust = 3-extra_last_thread;\n"
             "   } "
-            "   vstore_half3",roundName,"( vload3(i, p-adjust), i, f-adjust );\n"
+            "   vstore_half3",
+            roundName,
+            "( vload3(i, p-adjust), i, f-adjust );\n"
             "}\n"
         };
 
         const char *source_private_store[] = {
-            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global float",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
             "   __private ushort data[16];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t offset = 0;\n"
             "   size_t vecsize = vec_step(p[i]);\n"
-            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], 0, (__private half *)(&data[0]) );\n"
+            "   vstore_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( p[i], 0, (__private half *)(&data[0]) );\n"
             "   for(offset = 0; offset < vecsize; offset++)\n"
             "   {\n"
-            "       vstore_half(vload_half(offset, (__private half *)data), 0, &f[vecsize*i+offset]);\n"
+            "       vstore_half(vload_half(offset, (__private half *)data), 0, "
+            "&f[vecsize*i+offset]);\n"
             "   }\n"
             "}\n"
         };
@@ -388,10 +421,13 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "   if(last_i == i && extra_last_thread != 0) {\n"
             "     adjust = 3-extra_last_thread;\n"
             "   } "
-            "   vstore_half3",roundName,"( vload3(i, p-adjust), 0, (__private half *)(&data[0]) );\n"
+            "   vstore_half3",
+            roundName,
+            "( vload3(i, p-adjust), 0, (__private half *)(&data[0]) );\n"
             "   for(offset = 0; offset < 3; offset++)\n"
             "   {\n"
-            "       vstore_half(vload_half(offset, (__private half *) data), 0, &f[3*i+offset-adjust]);\n"
+            "       vstore_half(vload_half(offset, (__private half *) data), "
+            "0, &f[3*i+offset-adjust]);\n"
             "   }\n"
             "}\n"
         };
@@ -401,17 +437,26 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
 
 
         const char *source_local_store[] = {
-            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global float",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __local ushort data[16*", local_buf_size, "];\n"
+            "   __local ushort data[16*",
+            local_buf_size,
+            "];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   size_t lsize = get_local_size(0);\n"
             "   size_t vecsize = vec_step(p[0]);\n"
             "   event_t async_event;\n"
-            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], lid, (__local half *)(&data[0]) );\n"
+            "   vstore_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( p[i], lid, (__local half *)(&data[0]) );\n"
             "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
-            "   async_event = async_work_group_copy((__global ushort *)f+vecsize*(i-lid), (__local ushort *)(&data[0]), vecsize*lsize, 0);\n" // investigate later
+            "   async_event = async_work_group_copy((__global ushort "
+            "*)f+vecsize*(i-lid), (__local ushort *)(&data[0]), vecsize*lsize, "
+            "0);\n" // investigate later
             "   wait_group_events(1, &async_event);\n"
             "}\n"
         };
@@ -450,25 +495,36 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
 
         const char *double_source[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
             "   size_t i = get_global_id(0);\n"
-            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
+            "   vstore_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( p[i], i, f );\n"
             "}\n"
         };
 
         const char *double_source_private_store[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
             "   __private ushort data[16];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t offset = 0;\n"
             "   size_t vecsize = vec_step(p[i]);\n"
-            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], 0, (__private half *)(&data[0]) );\n"
+            "   vstore_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( p[i], 0, (__private half *)(&data[0]) );\n"
             "   for(offset = 0; offset < vecsize; offset++)\n"
             "   {\n"
-            "       vstore_half(vload_half(offset, (__private half *)data), 0, &f[vecsize*i+offset]);\n"
+            "       vstore_half(vload_half(offset, (__private half *)data), 0, "
+            "&f[vecsize*i+offset]);\n"
             "   }\n"
             "}\n"
         };
@@ -476,17 +532,26 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
 
         const char *double_source_local_store[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __local ushort data[16*", local_buf_size, "];\n"
+            "   __local ushort data[16*",
+            local_buf_size,
+            "];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   size_t vecsize = vec_step(p[0]);\n"
             "   size_t lsize = get_local_size(0);\n"
             "   event_t async_event;\n"
-            "   vstore_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], lid, (__local half *)(&data[0]) );\n"
+            "   vstore_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( p[i], lid, (__local half *)(&data[0]) );\n"
             "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
-            "   async_event = async_work_group_copy((__global ushort *)(f+vecsize*(i-lid)), (__local ushort *)(&data[0]), vecsize*lsize, 0);\n" // investigate later
+            "   async_event = async_work_group_copy((__global ushort "
+            "*)(f+vecsize*(i-lid)), (__local ushort *)(&data[0]), "
+            "vecsize*lsize, 0);\n" // investigate later
             "   wait_group_events(1, &async_event);\n"
             "}\n"
         };
@@ -503,7 +568,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "   if(last_i == i && extra_last_thread != 0) {\n"
             "     adjust = 3-extra_last_thread;\n"
             "   } "
-            "   vstore_half3",roundName,"( vload3(i,p-adjust), i, f -adjust);\n"
+            "   vstore_half3",
+            roundName,
+            "( vload3(i,p-adjust), i, f -adjust);\n"
             "}\n"
         };
 
@@ -520,10 +587,13 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "   if(last_i == i && extra_last_thread != 0) {\n"
             "     adjust = 3-extra_last_thread;\n"
             "   } "
-            "   vstore_half3",roundName,"( vload3(i, p-adjust), 0, (__private half *)(&data[0]) );\n"
+            "   vstore_half3",
+            roundName,
+            "( vload3(i, p-adjust), 0, (__private half *)(&data[0]) );\n"
             "   for(offset = 0; offset < 3; offset++)\n"
             "   {\n"
-            "       vstore_half(vload_half(offset, (__private half *)data), 0, &f[3*i+offset-adjust]);\n"
+            "       vstore_half(vload_half(offset, (__private half *)data), 0, "
+            "&f[3*i+offset-adjust]);\n"
             "   }\n"
             "}\n"
         };
@@ -562,151 +632,235 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
         };
 
 
-        if(g_arrVecSizes[vectorSize] == 3) {
-            programs[vectorSize][0] = MakeProgram( device, source_v3, sizeof(source_v3) / sizeof( source_v3[0]) );
-        } else {
-            programs[vectorSize][0] = MakeProgram( device, source, sizeof(source) / sizeof( source[0]) );
+        if (g_arrVecSizes[vectorSize] == 3)
+        {
+            programs[vectorSize][0] = MakeProgram(
+                device, source_v3, sizeof(source_v3) / sizeof(source_v3[0]));
         }
-        if( NULL == programs[ vectorSize ][0] )
+        else
+        {
+            programs[vectorSize][0] =
+                MakeProgram(device, source, sizeof(source) / sizeof(source[0]));
+        }
+        if (NULL == programs[vectorSize][0])
         {
             gFailCount++;
             return -1;
         }
 
-        kernels[ vectorSize ][0] = clCreateKernel( programs[ vectorSize ][0], "test", &error );
-        if( NULL == kernels[vectorSize][0] )
+        kernels[vectorSize][0] =
+            clCreateKernel(programs[vectorSize][0], "test", &error);
+        if (NULL == kernels[vectorSize][0])
         {
             gFailCount++;
-            vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
+            vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
             return error;
         }
 
-        if(g_arrVecSizes[vectorSize] == 3) {
-            programs[vectorSize][1] = MakeProgram( device, source_private_store_v3, sizeof(source_private_store_v3) / sizeof( source_private_store_v3[0]) );
-        } else {
-            programs[vectorSize][1] = MakeProgram( device, source_private_store, sizeof(source_private_store) / sizeof( source_private_store[0]) );
+        if (g_arrVecSizes[vectorSize] == 3)
+        {
+            programs[vectorSize][1] =
+                MakeProgram(device, source_private_store_v3,
+                            sizeof(source_private_store_v3)
+                                / sizeof(source_private_store_v3[0]));
+        }
+        else
+        {
+            programs[vectorSize][1] = MakeProgram(
+                device, source_private_store,
+                sizeof(source_private_store) / sizeof(source_private_store[0]));
         }
-        if( NULL == programs[ vectorSize ][1] )
+        if (NULL == programs[vectorSize][1])
         {
             gFailCount++;
             return -1;
         }
 
-        kernels[ vectorSize ][1] = clCreateKernel( programs[ vectorSize ][1], "test", &error );
-        if( NULL == kernels[vectorSize][1] )
+        kernels[vectorSize][1] =
+            clCreateKernel(programs[vectorSize][1], "test", &error);
+        if (NULL == kernels[vectorSize][1])
         {
             gFailCount++;
-            vlog_error( "\t\tFAILED -- Failed to create private kernel. (%d)\n", error );
+            vlog_error("\t\tFAILED -- Failed to create private kernel. (%d)\n",
+                       error);
             return error;
         }
 
-        if(g_arrVecSizes[vectorSize] == 3) {
-            programs[vectorSize][2] = MakeProgram( device, source_local_store_v3, sizeof(source_local_store_v3) / sizeof( source_local_store_v3[0]) );
-            if(  NULL == programs[ vectorSize ][2] )
+        if (g_arrVecSizes[vectorSize] == 3)
+        {
+            programs[vectorSize][2] =
+                MakeProgram(device, source_local_store_v3,
+                            sizeof(source_local_store_v3)
+                                / sizeof(source_local_store_v3[0]));
+            if (NULL == programs[vectorSize][2])
             {
                 unsigned q;
-                for ( q= 0; q < sizeof( source_local_store_v3) / sizeof( source_local_store_v3[0]); q++)
+                for (q = 0; q < sizeof(source_local_store_v3)
+                         / sizeof(source_local_store_v3[0]);
+                     q++)
                     vlog_error("%s", source_local_store_v3[q]);
 
                 gFailCount++;
                 return -1;
-
             }
-        } else {
-            programs[vectorSize][2] = MakeProgram( device, source_local_store, sizeof(source_local_store) / sizeof( source_local_store[0]) );
-            if( NULL == programs[ vectorSize ][2] )
+        }
+        else
+        {
+            programs[vectorSize][2] = MakeProgram(
+                device, source_local_store,
+                sizeof(source_local_store) / sizeof(source_local_store[0]));
+            if (NULL == programs[vectorSize][2])
             {
                 unsigned q;
-                for ( q= 0; q < sizeof( source_local_store) / sizeof( source_local_store[0]); q++)
+                for (q = 0; q < sizeof(source_local_store)
+                         / sizeof(source_local_store[0]);
+                     q++)
                     vlog_error("%s", source_local_store[q]);
 
                 gFailCount++;
                 return -1;
-
             }
         }
 
-        kernels[ vectorSize ][2] = clCreateKernel( programs[ vectorSize ][2], "test", &error );
-        if( NULL == kernels[vectorSize][2] )
+        kernels[vectorSize][2] =
+            clCreateKernel(programs[vectorSize][2], "test", &error);
+        if (NULL == kernels[vectorSize][2])
         {
             gFailCount++;
-            vlog_error( "\t\tFAILED -- Failed to create local kernel. (%d)\n", error );
+            vlog_error("\t\tFAILED -- Failed to create local kernel. (%d)\n",
+                       error);
             return error;
         }
 
-        if( gTestDouble )
+        if (gTestDouble)
         {
-            if(g_arrVecSizes[vectorSize] == 3) {
-                doublePrograms[vectorSize][0] = MakeProgram( device, double_source_v3, sizeof(double_source_v3) / sizeof( double_source_v3[0]) );
-            } else {
-                doublePrograms[vectorSize][0] = MakeProgram( device, double_source, sizeof(double_source) / sizeof( double_source[0]) );
+            if (g_arrVecSizes[vectorSize] == 3)
+            {
+                doublePrograms[vectorSize][0] = MakeProgram(
+                    device, double_source_v3,
+                    sizeof(double_source_v3) / sizeof(double_source_v3[0]));
+            }
+            else
+            {
+                doublePrograms[vectorSize][0] = MakeProgram(
+                    device, double_source,
+                    sizeof(double_source) / sizeof(double_source[0]));
             }
-            if( NULL == doublePrograms[ vectorSize ][0] )
+            if (NULL == doublePrograms[vectorSize][0])
             {
                 gFailCount++;
                 return -1;
             }
 
-            doubleKernels[ vectorSize ][0] = clCreateKernel( doublePrograms[ vectorSize ][0], "test", &error );
-            if( NULL == kernels[vectorSize][0] )
+            doubleKernels[vectorSize][0] =
+                clCreateKernel(doublePrograms[vectorSize][0], "test", &error);
+            if (NULL == kernels[vectorSize][0])
             {
                 gFailCount++;
-                vlog_error( "\t\tFAILED -- Failed to create double kernel. (%d)\n", error );
+                vlog_error(
+                    "\t\tFAILED -- Failed to create double kernel. (%d)\n",
+                    error);
                 return error;
             }
 
-            if(g_arrVecSizes[vectorSize] == 3)
-                doublePrograms[vectorSize][1] = MakeProgram( device, double_source_private_store_v3, sizeof(double_source_private_store_v3) / sizeof( double_source_private_store_v3[0]) );
+            if (g_arrVecSizes[vectorSize] == 3)
+                doublePrograms[vectorSize][1] = MakeProgram(
+                    device, double_source_private_store_v3,
+                    sizeof(double_source_private_store_v3)
+                        / sizeof(double_source_private_store_v3[0]));
             else
-                doublePrograms[vectorSize][1] = MakeProgram( device, double_source_private_store, sizeof(double_source_private_store) / sizeof( double_source_private_store[0]) );
+                doublePrograms[vectorSize][1] =
+                    MakeProgram(device, double_source_private_store,
+                                sizeof(double_source_private_store)
+                                    / sizeof(double_source_private_store[0]));
 
-            if( NULL == doublePrograms[ vectorSize ][1] )
+            if (NULL == doublePrograms[vectorSize][1])
             {
                 gFailCount++;
                 return -1;
             }
 
-            doubleKernels[ vectorSize ][1] = clCreateKernel( doublePrograms[ vectorSize ][1], "test", &error );
-            if( NULL == kernels[vectorSize][1] )
+            doubleKernels[vectorSize][1] =
+                clCreateKernel(doublePrograms[vectorSize][1], "test", &error);
+            if (NULL == kernels[vectorSize][1])
             {
                 gFailCount++;
-                vlog_error( "\t\tFAILED -- Failed to create double private kernel. (%d)\n", error );
+                vlog_error("\t\tFAILED -- Failed to create double private "
+                           "kernel. (%d)\n",
+                           error);
                 return error;
             }
 
-            if(g_arrVecSizes[vectorSize] == 3) {
-                doublePrograms[vectorSize][2] = MakeProgram( device, double_source_local_store_v3, sizeof(double_source_local_store_v3) / sizeof( double_source_local_store_v3[0]) );
-            } else {
-                doublePrograms[vectorSize][2] = MakeProgram( device, double_source_local_store, sizeof(double_source_local_store) / sizeof( double_source_local_store[0]) );
+            if (g_arrVecSizes[vectorSize] == 3)
+            {
+                doublePrograms[vectorSize][2] =
+                    MakeProgram(device, double_source_local_store_v3,
+                                sizeof(double_source_local_store_v3)
+                                    / sizeof(double_source_local_store_v3[0]));
+            }
+            else
+            {
+                doublePrograms[vectorSize][2] =
+                    MakeProgram(device, double_source_local_store,
+                                sizeof(double_source_local_store)
+                                    / sizeof(double_source_local_store[0]));
             }
-            if( NULL == doublePrograms[ vectorSize ][2] )
+            if (NULL == doublePrograms[vectorSize][2])
             {
                 gFailCount++;
                 return -1;
             }
 
-            doubleKernels[ vectorSize ][2] = clCreateKernel( doublePrograms[ vectorSize ][2], "test", &error );
-            if( NULL == kernels[vectorSize][2] )
+            doubleKernels[vectorSize][2] =
+                clCreateKernel(doublePrograms[vectorSize][2], "test", &error);
+            if (NULL == kernels[vectorSize][2])
             {
                 gFailCount++;
-                vlog_error( "\t\tFAILED -- Failed to create double local kernel. (%d)\n", error );
+                vlog_error("\t\tFAILED -- Failed to create double local "
+                           "kernel. (%d)\n",
+                           error);
                 return error;
             }
         }
     } // end for vector size
 
+    const char *reset[] = {
+        "__kernel void reset( __global float *p, __global ushort *f,\n"
+        "                   uint extra_last_thread)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   *(f + i) = 0xdead;"
+        "}\n"
+    };
+
+    if (!gHostReset)
+    {
+        resetProgram =
+            MakeProgram(device, reset, sizeof(reset) / sizeof(reset[0]));
+        if (NULL == resetProgram)
+        {
+            gFailCount++;
+            return -1;
+        }
+        resetKernel = clCreateKernel(resetProgram, "reset", &error);
+        if (NULL == resetKernel)
+        {
+            gFailCount++;
+            return -1;
+        }
+    }
+
     // Figure out how many elements are in a work block
     size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float));
     size_t blockCount = BUFFER_SIZE / elementSize; // elementSize is power of 2
-    uint64_t lastCase = 1ULL << (8*sizeof(float)); // number of floats.
+    uint64_t lastCase = 1ULL << (8 * sizeof(float)); // number of floats.
     size_t stride = blockCount;
 
     if (gWimpyMode)
         stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
 
     // we handle 64-bit types a bit differently.
-    if( lastCase == 0 )
-        lastCase = 0x100000000ULL;
+    if (lastCase == 0) lastCase = 0x100000000ULL;
 
     uint64_t i, j;
     error = 0;
@@ -746,7 +900,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
     dchk.lim = blockCount;
     dchk.count = (blockCount + threadCount - 1) / threadCount;
 
-    for( i = 0; i < lastCase; i += stride )
+    for (i = 0; i < lastCase; i += stride)
     {
         count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i);
         fref.i = i;
@@ -755,50 +909,71 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
         // Compute the input and reference
         ThreadPool_Do(ReferenceF, threadCount, &fref);
 
-        error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_FALSE, 0, count * sizeof(float ), gIn_single, 0, NULL, NULL);
-        if (error) {
-            vlog_error( "Failure in clWriteBuffer\n" );
+        error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_FALSE, 0,
+                                     count * sizeof(float), gIn_single, 0, NULL,
+                                     NULL);
+        if (error)
+        {
+            vlog_error("Failure in clWriteBuffer\n");
             gFailCount++;
             goto exit;
         }
 
-        if (gTestDouble) {
+        if (gTestDouble)
+        {
             ThreadPool_Do(ReferenceD, threadCount, &dref);
 
-            error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_FALSE, 0, count * sizeof(double ), gIn_double, 0, NULL, NULL);
-            if (error) {
-                vlog_error( "Failure in clWriteBuffer\n" );
+            error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_FALSE, 0,
+                                         count * sizeof(double), gIn_double, 0,
+                                         NULL, NULL);
+            if (error)
+            {
+                vlog_error("Failure in clWriteBuffer\n");
                 gFailCount++;
                 goto exit;
             }
         }
 
-        for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++) {
+        for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
+        {
             // Loop through vector sizes
             fchk.vsz = g_arrVecSizes[vectorSize];
             dchk.vsz = g_arrVecSizes[vectorSize];
 
-            for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
+            for (addressSpace = 0; addressSpace < 3; addressSpace++)
+            {
                 // Loop over address spaces
                 fchk.aspace = addressSpaceNames[addressSpace];
                 dchk.aspace = addressSpaceNames[addressSpace];
 
-                cl_uint pattern = 0xdeaddead;
-                memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
+                if (!gHostReset)
+                {
+                    error = RunKernel(device, resetKernel, gInBuffer_single,
+                                      gOutBuffer_half, count, 0);
+                }
+                else
+                {
+                    cl_uint pattern = 0xdeaddead;
+                    memset_pattern4(gOut_half, &pattern, BUFFER_SIZE / 2);
 
-                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE,
-                                             0, count * sizeof(cl_half),
-                                             gOut_half, 0, NULL, NULL);
-                if (error) {
-                    vlog_error( "Failure in clWriteArray\n" );
+                    error = clEnqueueWriteBuffer(
+                        gQueue, gOutBuffer_half, CL_FALSE, 0,
+                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                }
+                if (error)
+                {
+                    vlog_error("Failure in clWriteArray\n");
                     gFailCount++;
                     goto exit;
                 }
 
-                error = RunKernel(device, kernels[vectorSize][addressSpace], gInBuffer_single, gOutBuffer_half,
-                                       numVecs(count, vectorSize, aligned) ,
+                error = RunKernel(device, kernels[vectorSize][addressSpace],
+                                  gInBuffer_single, gOutBuffer_half,
+                                  numVecs(count, vectorSize, aligned),
                                   runsOverBy(count, vectorSize, aligned));
-                if (error) {
+                if (error)
+                {
                     gFailCount++;
                     goto exit;
                 }
@@ -806,34 +981,51 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
                 error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0,
                                             count * sizeof(cl_half), gOut_half,
                                             0, NULL, NULL);
-                if (error) {
-                    vlog_error( "Failure in clReadArray\n" );
+                if (error)
+                {
+                    vlog_error("Failure in clReadArray\n");
                     gFailCount++;
                     goto exit;
                 }
 
                 error = ThreadPool_Do(CheckF, threadCount, &fchk);
-                if (error) {
-                            gFailCount++;
-                            goto exit;
-                        }
+                if (error)
+                {
+                    gFailCount++;
+                    goto exit;
+                }
 
-                if (gTestDouble) {
-                    memset_pattern4( gOut_half, &pattern, BUFFER_SIZE/2);
+                if (gTestDouble)
+                {
 
-                    error = clEnqueueWriteBuffer(
-                        gQueue, gOutBuffer_half, CL_FALSE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
-                    if (error) {
-                        vlog_error( "Failure in clWriteArray\n" );
+                    if (!gHostReset)
+                    {
+                        error = RunKernel(device, resetKernel, gInBuffer_double,
+                                          gOutBuffer_half, count, 0);
+                    }
+                    else
+                    {
+                        cl_uint pattern = 0xdeaddead;
+                        memset_pattern4(gOut_half, &pattern, BUFFER_SIZE / 2);
+
+                        error = clEnqueueWriteBuffer(
+                            gQueue, gOutBuffer_half, CL_FALSE, 0,
+                            count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    }
+                    if (error)
+                    {
+                        vlog_error("Failure in clWriteArray\n");
                         gFailCount++;
                         goto exit;
                     }
 
-                    error = RunKernel(device, doubleKernels[vectorSize][addressSpace], gInBuffer_double, gOutBuffer_half,
+                    error = RunKernel(device,
+                                      doubleKernels[vectorSize][addressSpace],
+                                      gInBuffer_double, gOutBuffer_half,
                                       numVecs(count, vectorSize, aligned),
                                       runsOverBy(count, vectorSize, aligned));
-                    if (error) {
+                    if (error)
+                    {
                         gFailCount++;
                         goto exit;
                     }
@@ -841,148 +1033,185 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
                     error = clEnqueueReadBuffer(
                         gQueue, gOutBuffer_half, CL_TRUE, 0,
                         count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
-                    if (error) {
-                        vlog_error( "Failure in clReadArray\n" );
+                    if (error)
+                    {
+                        vlog_error("Failure in clReadArray\n");
                         gFailCount++;
                         goto exit;
                     }
 
+
                     error = ThreadPool_Do(CheckD, threadCount, &dchk);
-                    if (error) {
-                                gFailCount++;
-                                goto exit;
-                            }
-                        }
+                    if (error)
+                    {
+                        gFailCount++;
+                        goto exit;
                     }
                 }
+            }
+        }
 
-        if( ((i+blockCount) & ~printMask) == (i+blockCount) )
+        if (((i + blockCount) & ~printMask) == (i + blockCount))
         {
-            vlog( "." );
-            fflush( stdout );
+            vlog(".");
+            fflush(stdout);
         }
-    }  // end last case
+    } // end last case
 
     loopCount = count == blockCount ? 1 : 100;
-    if( gReportTimes )
+    if (gReportTimes)
     {
-        //Init the input stream
+        // Init the input stream
         cl_float *p = (cl_float *)gIn_single;
-        for( j = 0; j < count; j++ )
-            p[j] = (float)((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+        for (j = 0; j < count; j++)
+            p[j] = (float)((double)(rand() - RAND_MAX / 2) / (RAND_MAX / 2));
 
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_TRUE, 0, count * sizeof( float ), gIn_single, 0, NULL, NULL)) )
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_TRUE, 0,
+                                          count * sizeof(float), gIn_single, 0,
+                                          NULL, NULL)))
         {
-            vlog_error( "Failure in clWriteArray\n" );
+            vlog_error("Failure in clWriteArray\n");
             gFailCount++;
             goto exit;
         }
 
-        if( gTestDouble )
+        if (gTestDouble)
         {
-            //Init the input stream
+            // Init the input stream
             cl_double *q = (cl_double *)gIn_double;
-            for( j = 0; j < count; j++ )
-                q[j] = ((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+            for (j = 0; j < count; j++)
+                q[j] = ((double)(rand() - RAND_MAX / 2) / (RAND_MAX / 2));
 
-            if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_TRUE, 0, count * sizeof( double ), gIn_double, 0, NULL, NULL)) )
+            if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_TRUE,
+                                              0, count * sizeof(double),
+                                              gIn_double, 0, NULL, NULL)))
             {
-                vlog_error( "Failure in clWriteArray\n" );
+                vlog_error("Failure in clWriteArray\n");
                 gFailCount++;
                 goto exit;
             }
         }
 
-        //Run again for timing
-        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+        // Run again for timing
+        for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
         {
             uint64_t bestTime = -1ULL;
-            for( j = 0; j < loopCount; j++ )
+            for (j = 0; j < loopCount; j++)
             {
                 uint64_t startTime = ReadTime();
 
 
-                if( (error = RunKernel(device, kernels[vectorSize][0], gInBuffer_single, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
-                                       runsOverBy(count, vectorSize, aligned)) ) )
+                if ((error = RunKernel(device, kernels[vectorSize][0],
+                                       gInBuffer_single, gOutBuffer_half,
+                                       numVecs(count, vectorSize, aligned),
+                                       runsOverBy(count, vectorSize, aligned))))
                 {
                     gFailCount++;
                     goto exit;
                 }
 
-                if( (error = clFinish(gQueue)) )
+                if ((error = clFinish(gQueue)))
                 {
-                    vlog_error( "Failure in clFinish\n" );
+                    vlog_error("Failure in clFinish\n");
                     gFailCount++;
                     goto exit;
                 }
                 uint64_t currentTime = ReadTime() - startTime;
-                if( currentTime < bestTime )
-                    bestTime = currentTime;
-                time[ vectorSize ] += currentTime;
+                if (currentTime < bestTime) bestTime = currentTime;
+                time[vectorSize] += currentTime;
             }
-            if( bestTime < min_time[ vectorSize ] )
-                min_time[ vectorSize ] = bestTime ;
+            if (bestTime < min_time[vectorSize])
+                min_time[vectorSize] = bestTime;
 
-            if( gTestDouble )
+            if (gTestDouble)
             {
                 bestTime = -1ULL;
-                for( j = 0; j < loopCount; j++ )
+                for (j = 0; j < loopCount; j++)
                 {
                     uint64_t startTime = ReadTime();
-                    if( (error = RunKernel(device, doubleKernels[vectorSize][0], gInBuffer_double, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
-                                           runsOverBy(count, vectorSize, aligned)) ) )
+                    if ((error =
+                             RunKernel(device, doubleKernels[vectorSize][0],
+                                       gInBuffer_double, gOutBuffer_half,
+                                       numVecs(count, vectorSize, aligned),
+                                       runsOverBy(count, vectorSize, aligned))))
                     {
                         gFailCount++;
                         goto exit;
                     }
 
-                    if( (error = clFinish(gQueue)) )
+                    if ((error = clFinish(gQueue)))
                     {
-                        vlog_error( "Failure in clFinish\n" );
+                        vlog_error("Failure in clFinish\n");
                         gFailCount++;
                         goto exit;
                     }
                     uint64_t currentTime = ReadTime() - startTime;
-                    if( currentTime < bestTime )
-                        bestTime = currentTime;
-                    doubleTime[ vectorSize ] += currentTime;
+                    if (currentTime < bestTime) bestTime = currentTime;
+                    doubleTime[vectorSize] += currentTime;
                 }
-                if( bestTime < min_double_time[ vectorSize ] )
-                    min_double_time[ vectorSize ] = bestTime;
+                if (bestTime < min_double_time[vectorSize])
+                    min_double_time[vectorSize] = bestTime;
             }
         }
     }
 
-    if( gReportTimes )
+    if (gReportTimes)
     {
-        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-            vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
-                      "average us/elem", "vStoreHalf%s avg. (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
-        for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-            vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
-                      "best us/elem", "vStoreHalf%s best (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize])  );
-        if( gTestDouble )
+        for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
+            vlog_perf(SubtractTime(time[vectorSize], 0) * 1e6 * gDeviceFrequency
+                          * gComputeDevices / (double)(count * loopCount),
+                      0, "average us/elem",
+                      "vStoreHalf%s avg. (%s vector size: %d)", roundName,
+                      addressSpaceNames[0], (g_arrVecSizes[vectorSize]));
+        for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
+            vlog_perf(SubtractTime(min_time[vectorSize], 0) * 1e6
+                          * gDeviceFrequency * gComputeDevices / (double)count,
+                      0, "best us/elem",
+                      "vStoreHalf%s best (%s vector size: %d)", roundName,
+                      addressSpaceNames[0], (g_arrVecSizes[vectorSize]));
+        if (gTestDouble)
         {
-            for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-                vlog_perf( SubtractTime( doubleTime[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
-                          "average us/elem (double)", "vStoreHalf%s avg. d (%s vector size: %d)", roundName, addressSpaceNames[0],  (g_arrVecSizes[vectorSize])  );
-            for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-                vlog_perf( SubtractTime( min_double_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
-                          "best us/elem (double)", "vStoreHalf%s best d (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
+            for (vectorSize = kMinVectorSize;
+                 vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf(SubtractTime(doubleTime[vectorSize], 0) * 1e6
+                              * gDeviceFrequency * gComputeDevices
+                              / (double)(count * loopCount),
+                          0, "average us/elem (double)",
+                          "vStoreHalf%s avg. d (%s vector size: %d)", roundName,
+                          addressSpaceNames[0], (g_arrVecSizes[vectorSize]));
+            for (vectorSize = kMinVectorSize;
+                 vectorSize < kLastVectorSizeToTest; vectorSize++)
+                vlog_perf(SubtractTime(min_double_time[vectorSize], 0) * 1e6
+                              * gDeviceFrequency * gComputeDevices
+                              / (double)count,
+                          0, "best us/elem (double)",
+                          "vStoreHalf%s best d (%s vector size: %d)", roundName,
+                          addressSpaceNames[0], (g_arrVecSizes[vectorSize]));
         }
     }
 
 exit:
-    //clean up
-    for( vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    // clean up
+    if (!gHostReset)
+    {
+        clReleaseKernel(resetKernel);
+        clReleaseProgram(resetProgram);
+    }
+
+    for (vectorSize = kMinVectorSize; vectorSize < kLastVectorSizeToTest;
+         vectorSize++)
     {
-        for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
-            clReleaseKernel( kernels[ vectorSize ][ addressSpace ] );
-            clReleaseProgram( programs[ vectorSize ][ addressSpace ] );
-            if( gTestDouble )
+        for (addressSpace = 0; addressSpace < 3; addressSpace++)
+        {
+            clReleaseKernel(kernels[vectorSize][addressSpace]);
+            clReleaseProgram(programs[vectorSize][addressSpace]);
+            if (gTestDouble)
             {
-                clReleaseKernel( doubleKernels[ vectorSize ][addressSpace] );
-                clReleaseProgram( doublePrograms[ vectorSize ][addressSpace] );
+                clReleaseKernel(doubleKernels[vectorSize][addressSpace]);
+                clReleaseProgram(doublePrograms[vectorSize][addressSpace]);
             }
         }
     }
@@ -990,321 +1219,495 @@ exit:
     return error;
 }
 
-int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleReferenceFunc, const char *roundName )
+int Test_vStoreaHalf_private(cl_device_id device, f2h referenceFunc,
+                             d2h doubleReferenceFunc, const char *roundName)
 {
     int vectorSize, error;
-    cl_program  programs[kVectorSizeCount+kStrangeVectorSizeCount][3];
-    cl_kernel   kernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
-
-    uint64_t time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    uint64_t min_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    memset( min_time, -1, sizeof( min_time ) );
-    cl_program  doublePrograms[kVectorSizeCount+kStrangeVectorSizeCount][3];
-    cl_kernel   doubleKernels[kVectorSizeCount+kStrangeVectorSizeCount][3];
-    uint64_t doubleTime[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    uint64_t min_double_time[kVectorSizeCount+kStrangeVectorSizeCount] = {0};
-    memset( min_double_time, -1, sizeof( min_double_time ) );
+    cl_program programs[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    cl_kernel kernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    cl_program resetProgram;
+    cl_kernel resetKernel;
+
+    uint64_t time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
+    uint64_t min_time[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
+    memset(min_time, -1, sizeof(min_time));
+    cl_program doublePrograms[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    cl_kernel doubleKernels[kVectorSizeCount + kStrangeVectorSizeCount][3];
+    uint64_t doubleTime[kVectorSizeCount + kStrangeVectorSizeCount] = { 0 };
+    uint64_t min_double_time[kVectorSizeCount + kStrangeVectorSizeCount] = {
+        0
+    };
+    memset(min_double_time, -1, sizeof(min_double_time));
 
     bool aligned = true;
 
     int minVectorSize = kMinVectorSize;
     // There is no aligned scalar vstorea_half
-    if( 0 == minVectorSize )
-        minVectorSize = 1;
+    if (0 == minVectorSize) minVectorSize = 1;
 
-    //Loop over vector sizes
-    for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    // Loop over vector sizes
+    for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+         vectorSize++)
     {
-        const char *source[] = {
-            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
-            "{\n"
-            "   size_t i = get_global_id(0);\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
-            "}\n"
-        };
+        const char *source[] = { "__kernel void test( __global float",
+                                 vector_size_name_extensions[vectorSize],
+                                 " *p, __global half *f )\n"
+                                 "{\n"
+                                 "   size_t i = get_global_id(0);\n"
+                                 "   vstorea_half",
+                                 vector_size_name_extensions[vectorSize],
+                                 roundName,
+                                 "( p[i], i, f );\n"
+                                 "}\n" };
 
         const char *source_v3[] = {
             "__kernel void test( __global float3 *p, __global half *f )\n"
             "{\n"
             "   size_t i = get_global_id(0);\n"
-            "   vstorea_half3",roundName,"( p[i], i, f );\n"
-            "   vstore_half",roundName,"( ((__global  float *)p)[4*i+3], 4*i+3, f);\n"
+            "   vstorea_half3",
+            roundName,
+            "( p[i], i, f );\n"
+            "   vstore_half",
+            roundName,
+            "( ((__global  float *)p)[4*i+3], 4*i+3, f);\n"
             "}\n"
         };
 
         const char *source_private[] = {
-            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global float",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __private float", vector_size_name_extensions[vectorSize], " data;\n"
+            "   __private float",
+            vector_size_name_extensions[vectorSize],
+            " data;\n"
             "   size_t i = get_global_id(0);\n"
             "   data = p[i];\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data, i, f );\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( data, i, f );\n"
             "}\n"
         };
 
         const char *source_private_v3[] = {
             "__kernel void test( __global float3 *p, __global half *f )\n"
             "{\n"
-            "   __private float", vector_size_name_extensions[vectorSize], " data;\n"
+            "   __private float",
+            vector_size_name_extensions[vectorSize],
+            " data;\n"
             "   size_t i = get_global_id(0);\n"
             "   data = p[i];\n"
-            "   vstorea_half3",roundName,"( data, i, f );\n"
-            "   vstore_half",roundName,"( ((__global  float *)p)[4*i+3], 4*i+3, f);\n"
+            "   vstorea_half3",
+            roundName,
+            "( data, i, f );\n"
+            "   vstore_half",
+            roundName,
+            "( ((__global  float *)p)[4*i+3], 4*i+3, f);\n"
             "}\n"
         };
 
         char local_buf_size[10];
         sprintf(local_buf_size, "%lld", (uint64_t)gWorkGroupSize);
-        const char *source_local[] = {
-            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
-            "{\n"
-            "   __local float", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
-            "   size_t i = get_global_id(0);\n"
-            "   size_t lid = get_local_id(0);\n"
-            "   data[lid] = p[i];\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
-            "}\n"
-        };
+        const char *source_local[] = { "__kernel void test( __global float",
+                                       vector_size_name_extensions[vectorSize],
+                                       " *p, __global half *f )\n"
+                                       "{\n"
+                                       "   __local float",
+                                       vector_size_name_extensions[vectorSize],
+                                       " data[",
+                                       local_buf_size,
+                                       "];\n"
+                                       "   size_t i = get_global_id(0);\n"
+                                       "   size_t lid = get_local_id(0);\n"
+                                       "   data[lid] = p[i];\n"
+                                       "   vstorea_half",
+                                       vector_size_name_extensions[vectorSize],
+                                       roundName,
+                                       "( data[lid], i, f );\n"
+                                       "}\n" };
 
         const char *source_local_v3[] = {
-            "__kernel void test( __global float", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global float",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __local float", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
+            "   __local float",
+            vector_size_name_extensions[vectorSize],
+            " data[",
+            local_buf_size,
+            "];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   data[lid] = p[i];\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
-            "   vstore_half",roundName,"( ((__global float *)p)[4*i+3], 4*i+3, f);\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( data[lid], i, f );\n"
+            "   vstore_half",
+            roundName,
+            "( ((__global float *)p)[4*i+3], 4*i+3, f);\n"
             "}\n"
         };
 
         const char *double_source[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
             "   size_t i = get_global_id(0);\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( p[i], i, f );\n"
             "}\n"
         };
 
         const char *double_source_v3[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
             "   size_t i = get_global_id(0);\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( p[i], i, f );\n"
-            "   vstore_half",roundName,"( ((__global double *)p)[4*i+3], 4*i+3, f);\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( p[i], i, f );\n"
+            "   vstore_half",
+            roundName,
+            "( ((__global double *)p)[4*i+3], 4*i+3, f);\n"
             "}\n"
         };
 
         const char *double_source_private[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __private double", vector_size_name_extensions[vectorSize], " data;\n"
+            "   __private double",
+            vector_size_name_extensions[vectorSize],
+            " data;\n"
             "   size_t i = get_global_id(0);\n"
             "   data = p[i];\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data, i, f );\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( data, i, f );\n"
             "}\n"
         };
 
         const char *double_source_private_v3[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __private double", vector_size_name_extensions[vectorSize], " data;\n"
+            "   __private double",
+            vector_size_name_extensions[vectorSize],
+            " data;\n"
             "   size_t i = get_global_id(0);\n"
             "   data = p[i];\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data, i, f );\n"
-            "   vstore_half",roundName,"( ((__global  double *)p)[4*i+3], 4*i+3, f);\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( data, i, f );\n"
+            "   vstore_half",
+            roundName,
+            "( ((__global  double *)p)[4*i+3], 4*i+3, f);\n"
             "}\n"
         };
 
         const char *double_source_local[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __local double", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
+            "   __local double",
+            vector_size_name_extensions[vectorSize],
+            " data[",
+            local_buf_size,
+            "];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   data[lid] = p[i];\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( data[lid], i, f );\n"
             "}\n"
         };
 
         const char *double_source_local_v3[] = {
             "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-            "__kernel void test( __global double", vector_size_name_extensions[vectorSize]," *p, __global half *f )\n"
+            "__kernel void test( __global double",
+            vector_size_name_extensions[vectorSize],
+            " *p, __global half *f )\n"
             "{\n"
-            "   __local double", vector_size_name_extensions[vectorSize], " data[", local_buf_size, "];\n"
+            "   __local double",
+            vector_size_name_extensions[vectorSize],
+            " data[",
+            local_buf_size,
+            "];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   data[lid] = p[i];\n"
-            "   vstorea_half",vector_size_name_extensions[vectorSize],roundName,"( data[lid], i, f );\n"
-            "   vstore_half",roundName,"( ((__global double *)p)[4*i+3], 4*i+3, f);\n"
+            "   vstorea_half",
+            vector_size_name_extensions[vectorSize],
+            roundName,
+            "( data[lid], i, f );\n"
+            "   vstore_half",
+            roundName,
+            "( ((__global double *)p)[4*i+3], 4*i+3, f);\n"
             "}\n"
         };
 
-        if(g_arrVecSizes[vectorSize] == 3) {
-            programs[vectorSize][0] = MakeProgram( device, source_v3, sizeof(source_v3) / sizeof( source_v3[0]) );
-            if( NULL == programs[ vectorSize ][0] )
+        if (g_arrVecSizes[vectorSize] == 3)
+        {
+            programs[vectorSize][0] = MakeProgram(
+                device, source_v3, sizeof(source_v3) / sizeof(source_v3[0]));
+            if (NULL == programs[vectorSize][0])
             {
                 gFailCount++;
                 return -1;
             }
-        } else {
-            programs[vectorSize][0] = MakeProgram( device, source, sizeof(source) / sizeof( source[0]) );
-            if( NULL == programs[ vectorSize ][0] )
+        }
+        else
+        {
+            programs[vectorSize][0] =
+                MakeProgram(device, source, sizeof(source) / sizeof(source[0]));
+            if (NULL == programs[vectorSize][0])
             {
                 gFailCount++;
                 return -1;
             }
         }
 
-        kernels[ vectorSize ][0] = clCreateKernel( programs[ vectorSize ][0], "test", &error );
-        if( NULL == kernels[vectorSize][0] )
+        kernels[vectorSize][0] =
+            clCreateKernel(programs[vectorSize][0], "test", &error);
+        if (NULL == kernels[vectorSize][0])
         {
             gFailCount++;
-            vlog_error( "\t\tFAILED -- Failed to create kernel. (%d)\n", error );
+            vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
             return error;
         }
 
-        if(g_arrVecSizes[vectorSize] == 3) {
-            programs[vectorSize][1] = MakeProgram( device, source_private_v3, sizeof(source_private_v3) / sizeof( source_private_v3[0]) );
-            if( NULL == programs[ vectorSize ][1] )
+        if (g_arrVecSizes[vectorSize] == 3)
+        {
+            programs[vectorSize][1] = MakeProgram(
+                device, source_private_v3,
+                sizeof(source_private_v3) / sizeof(source_private_v3[0]));
+            if (NULL == programs[vectorSize][1])
             {
                 gFailCount++;
                 return -1;
             }
-        } else {
-            programs[vectorSize][1] = MakeProgram( device, source_private, sizeof(source_private) / sizeof( source_private[0]) );
-            if( NULL == programs[ vectorSize ][1] )
+        }
+        else
+        {
+            programs[vectorSize][1] =
+                MakeProgram(device, source_private,
+                            sizeof(source_private) / sizeof(source_private[0]));
+            if (NULL == programs[vectorSize][1])
             {
                 gFailCount++;
                 return -1;
             }
         }
 
-        kernels[ vectorSize ][1] = clCreateKernel( programs[ vectorSize ][1], "test", &error );
-        if( NULL == kernels[vectorSize][1] )
+        kernels[vectorSize][1] =
+            clCreateKernel(programs[vectorSize][1], "test", &error);
+        if (NULL == kernels[vectorSize][1])
         {
             gFailCount++;
-            vlog_error( "\t\tFAILED -- Failed to create private kernel. (%d)\n", error );
+            vlog_error("\t\tFAILED -- Failed to create private kernel. (%d)\n",
+                       error);
             return error;
         }
 
-        if(g_arrVecSizes[vectorSize] == 3) {
-            programs[vectorSize][2] = MakeProgram( device, source_local_v3, sizeof(source_local_v3) / sizeof( source_local_v3[0]) );
-            if( NULL == programs[ vectorSize ][2] )
+        if (g_arrVecSizes[vectorSize] == 3)
+        {
+            programs[vectorSize][2] = MakeProgram(
+                device, source_local_v3,
+                sizeof(source_local_v3) / sizeof(source_local_v3[0]));
+            if (NULL == programs[vectorSize][2])
             {
                 gFailCount++;
                 return -1;
             }
-        } else {
-            programs[vectorSize][2] = MakeProgram( device, source_local, sizeof(source_local) / sizeof( source_local[0]) );
-            if( NULL == programs[ vectorSize ][2] )
+        }
+        else
+        {
+            programs[vectorSize][2] =
+                MakeProgram(device, source_local,
+                            sizeof(source_local) / sizeof(source_local[0]));
+            if (NULL == programs[vectorSize][2])
             {
                 gFailCount++;
                 return -1;
             }
         }
 
-        kernels[ vectorSize ][2] = clCreateKernel( programs[ vectorSize ][2], "test", &error );
-        if( NULL == kernels[vectorSize][2] )
+        kernels[vectorSize][2] =
+            clCreateKernel(programs[vectorSize][2], "test", &error);
+        if (NULL == kernels[vectorSize][2])
         {
             gFailCount++;
-            vlog_error( "\t\tFAILED -- Failed to create local kernel. (%d)\n", error );
+            vlog_error("\t\tFAILED -- Failed to create local kernel. (%d)\n",
+                       error);
             return error;
         }
 
-        if( gTestDouble )
+        if (gTestDouble)
         {
-            if(g_arrVecSizes[vectorSize] == 3) {
-                doublePrograms[vectorSize][0] = MakeProgram( device, double_source_v3, sizeof(double_source_v3) / sizeof( double_source_v3[0]) );
-                if( NULL == doublePrograms[ vectorSize ][0] )
+            if (g_arrVecSizes[vectorSize] == 3)
+            {
+                doublePrograms[vectorSize][0] = MakeProgram(
+                    device, double_source_v3,
+                    sizeof(double_source_v3) / sizeof(double_source_v3[0]));
+                if (NULL == doublePrograms[vectorSize][0])
                 {
                     gFailCount++;
                     return -1;
                 }
-            } else {
-                doublePrograms[vectorSize][0] = MakeProgram( device, double_source, sizeof(double_source) / sizeof( double_source[0]) );
-                if( NULL == doublePrograms[ vectorSize ][0] )
+            }
+            else
+            {
+                doublePrograms[vectorSize][0] = MakeProgram(
+                    device, double_source,
+                    sizeof(double_source) / sizeof(double_source[0]));
+                if (NULL == doublePrograms[vectorSize][0])
                 {
                     gFailCount++;
                     return -1;
                 }
             }
 
-            doubleKernels[ vectorSize ][0] = clCreateKernel( doublePrograms[ vectorSize ][0], "test", &error );
-            if( NULL == kernels[vectorSize][0] )
+            doubleKernels[vectorSize][0] =
+                clCreateKernel(doublePrograms[vectorSize][0], "test", &error);
+            if (NULL == kernels[vectorSize][0])
             {
                 gFailCount++;
-                vlog_error( "\t\tFAILED -- Failed to create double kernel. (%d)\n", error );
+                vlog_error(
+                    "\t\tFAILED -- Failed to create double kernel. (%d)\n",
+                    error);
                 return error;
             }
 
-            if(g_arrVecSizes[vectorSize] == 3) {
-                doublePrograms[vectorSize][1] = MakeProgram( device, double_source_private_v3, sizeof(double_source_private_v3) / sizeof( double_source_private_v3[0]) );
-                if( NULL == doublePrograms[ vectorSize ][1] )
+            if (g_arrVecSizes[vectorSize] == 3)
+            {
+                doublePrograms[vectorSize][1] =
+                    MakeProgram(device, double_source_private_v3,
+                                sizeof(double_source_private_v3)
+                                    / sizeof(double_source_private_v3[0]));
+                if (NULL == doublePrograms[vectorSize][1])
                 {
                     gFailCount++;
                     return -1;
                 }
-            } else {
-                doublePrograms[vectorSize][1] = MakeProgram( device, double_source_private, sizeof(double_source_private) / sizeof( double_source_private[0]) );
-                if( NULL == doublePrograms[ vectorSize ][1] )
+            }
+            else
+            {
+                doublePrograms[vectorSize][1] =
+                    MakeProgram(device, double_source_private,
+                                sizeof(double_source_private)
+                                    / sizeof(double_source_private[0]));
+                if (NULL == doublePrograms[vectorSize][1])
                 {
                     gFailCount++;
                     return -1;
                 }
             }
 
-            doubleKernels[ vectorSize ][1] = clCreateKernel( doublePrograms[ vectorSize ][1], "test", &error );
-            if( NULL == kernels[vectorSize][1] )
+            doubleKernels[vectorSize][1] =
+                clCreateKernel(doublePrograms[vectorSize][1], "test", &error);
+            if (NULL == kernels[vectorSize][1])
             {
                 gFailCount++;
-                vlog_error( "\t\tFAILED -- Failed to create double private kernel. (%d)\n", error );
+                vlog_error("\t\tFAILED -- Failed to create double private "
+                           "kernel. (%d)\n",
+                           error);
                 return error;
             }
 
-            if(g_arrVecSizes[vectorSize] == 3) {
-                doublePrograms[vectorSize][2] = MakeProgram( device, double_source_local_v3, sizeof(double_source_local_v3) / sizeof( double_source_local_v3[0]) );
-                if( NULL == doublePrograms[ vectorSize ][2] )
+            if (g_arrVecSizes[vectorSize] == 3)
+            {
+                doublePrograms[vectorSize][2] =
+                    MakeProgram(device, double_source_local_v3,
+                                sizeof(double_source_local_v3)
+                                    / sizeof(double_source_local_v3[0]));
+                if (NULL == doublePrograms[vectorSize][2])
                 {
                     gFailCount++;
                     return -1;
                 }
-            } else {
-                doublePrograms[vectorSize][2] = MakeProgram( device, double_source_local, sizeof(double_source_local) / sizeof( double_source_local[0]) );
-                if( NULL == doublePrograms[ vectorSize ][2] )
+            }
+            else
+            {
+                doublePrograms[vectorSize][2] =
+                    MakeProgram(device, double_source_local,
+                                sizeof(double_source_local)
+                                    / sizeof(double_source_local[0]));
+                if (NULL == doublePrograms[vectorSize][2])
                 {
                     gFailCount++;
                     return -1;
                 }
             }
 
-            doubleKernels[ vectorSize ][2] = clCreateKernel( doublePrograms[ vectorSize ][2], "test", &error );
-            if( NULL == kernels[vectorSize][2] )
+            doubleKernels[vectorSize][2] =
+                clCreateKernel(doublePrograms[vectorSize][2], "test", &error);
+            if (NULL == kernels[vectorSize][2])
             {
                 gFailCount++;
-                vlog_error( "\t\tFAILED -- Failed to create double local kernel. (%d)\n", error );
+                vlog_error("\t\tFAILED -- Failed to create double local "
+                           "kernel. (%d)\n",
+                           error);
                 return error;
             }
         }
     }
 
+    const char *reset[] = {
+        "__kernel void reset( __global float *p, __global ushort *f,\n"
+        "                   uint extra_last_thread)\n"
+        "{\n"
+        "   size_t i = get_global_id(0);\n"
+        "   *(f + i) = 0xdead;"
+        "}\n"
+    };
+
+    if (!gHostReset)
+    {
+        resetProgram =
+            MakeProgram(device, reset, sizeof(reset) / sizeof(reset[0]));
+        if (NULL == resetProgram)
+        {
+            gFailCount++;
+            return -1;
+        }
+        resetKernel = clCreateKernel(resetProgram, "reset", &error);
+        if (NULL == resetKernel)
+        {
+            gFailCount++;
+            return -1;
+        }
+    }
+
     // Figure out how many elements are in a work block
     size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float));
     size_t blockCount = BUFFER_SIZE / elementSize;
-    uint64_t lastCase = 1ULL << (8*sizeof(float));
+    uint64_t lastCase = 1ULL << (8 * sizeof(float));
     size_t stride = blockCount;
 
     if (gWimpyMode)
         stride = (uint64_t)blockCount * (uint64_t)gWimpyReductionFactor;
 
     // we handle 64-bit types a bit differently.
-    if( lastCase == 0 )
-        lastCase = 0x100000000ULL;
+    if (lastCase == 0) lastCase = 0x100000000ULL;
     uint64_t i, j;
     error = 0;
     uint64_t printMask = (lastCase >> 4) - 1;
@@ -1343,7 +1746,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
     dchk.lim = blockCount;
     dchk.count = (blockCount + threadCount - 1) / threadCount;
 
-    for( i = 0; i < (uint64_t)lastCase; i += stride )
+    for (i = 0; i < (uint64_t)lastCase; i += stride)
     {
         count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i);
         fref.i = i;
@@ -1352,50 +1755,71 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
         // Create the input and reference
         ThreadPool_Do(ReferenceF, threadCount, &fref);
 
-        error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_FALSE, 0, count * sizeof(float ), gIn_single, 0, NULL, NULL);
-        if (error) {
-            vlog_error( "Failure in clWriteArray\n" );
+        error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_FALSE, 0,
+                                     count * sizeof(float), gIn_single, 0, NULL,
+                                     NULL);
+        if (error)
+        {
+            vlog_error("Failure in clWriteArray\n");
             gFailCount++;
             goto exit;
         }
 
-        if (gTestDouble) {
+        if (gTestDouble)
+        {
             ThreadPool_Do(ReferenceD, threadCount, &dref);
 
-            error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_FALSE, 0, count * sizeof(double ), gIn_double, 0, NULL, NULL);
-            if (error) {
-                vlog_error( "Failure in clWriteArray\n" );
+            error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_FALSE, 0,
+                                         count * sizeof(double), gIn_double, 0,
+                                         NULL, NULL);
+            if (error)
+            {
+                vlog_error("Failure in clWriteArray\n");
                 gFailCount++;
                 goto exit;
             }
         }
 
-        for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++) {
+        for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
+        {
             // Loop over vector legths
             fchk.vsz = g_arrVecSizes[vectorSize];
             dchk.vsz = g_arrVecSizes[vectorSize];
 
-            for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
+            for (addressSpace = 0; addressSpace < 3; addressSpace++)
+            {
                 // Loop over address spaces
                 fchk.aspace = addressSpaceNames[addressSpace];
                 dchk.aspace = addressSpaceNames[addressSpace];
 
-                cl_uint pattern = 0xdeaddead;
-                memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
+                if (!gHostReset)
+                {
+                    error = RunKernel(device, resetKernel, gInBuffer_single,
+                                      gOutBuffer_half, count, 0);
+                }
+                else
+                {
+                    cl_uint pattern = 0xdeaddead;
+                    memset_pattern4(gOut_half, &pattern, BUFFER_SIZE / 2);
 
-                error = clEnqueueWriteBuffer(gQueue, gOutBuffer_half, CL_FALSE,
-                                             0, count * sizeof(cl_half),
-                                             gOut_half, 0, NULL, NULL);
-                if (error) {
-                    vlog_error( "Failure in clWriteArray\n" );
+                    error = clEnqueueWriteBuffer(
+                        gQueue, gOutBuffer_half, CL_FALSE, 0,
+                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                }
+                if (error)
+                {
+                    vlog_error("Failure in clWriteArray\n");
                     gFailCount++;
                     goto exit;
                 }
 
-                error = RunKernel(device, kernels[vectorSize][addressSpace], gInBuffer_single, gOutBuffer_half,
+                error = RunKernel(device, kernels[vectorSize][addressSpace],
+                                  gInBuffer_single, gOutBuffer_half,
                                   numVecs(count, vectorSize, aligned),
                                   runsOverBy(count, vectorSize, aligned));
-                if (error) {
+                if (error)
+                {
                     gFailCount++;
                     goto exit;
                 }
@@ -1403,34 +1827,51 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
                 error = clEnqueueReadBuffer(gQueue, gOutBuffer_half, CL_TRUE, 0,
                                             count * sizeof(cl_half), gOut_half,
                                             0, NULL, NULL);
-                if (error) {
-                    vlog_error( "Failure in clReadArray\n" );
+                if (error)
+                {
+                    vlog_error("Failure in clReadArray\n");
                     gFailCount++;
                     goto exit;
                 }
 
                 error = ThreadPool_Do(CheckF, threadCount, &fchk);
-                if (error) {
-                            gFailCount++;
-                            goto exit;
-                        }
+                if (error)
+                {
+                    gFailCount++;
+                    goto exit;
+                }
 
-                if (gTestDouble) {
-                    memset_pattern4(gOut_half, &pattern, BUFFER_SIZE/2);
+                if (gTestDouble)
+                {
 
-                    error = clEnqueueWriteBuffer(
-                        gQueue, gOutBuffer_half, CL_FALSE, 0,
-                        count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
-                    if (error) {
-                        vlog_error( "Failure in clWriteArray\n" );
+                    if (!gHostReset)
+                    {
+                        error = RunKernel(device, resetKernel, gInBuffer_single,
+                                          gOutBuffer_half, count, 0);
+                    }
+                    else
+                    {
+                        cl_uint pattern = 0xdeaddead;
+                        memset_pattern4(gOut_half, &pattern, BUFFER_SIZE / 2);
+
+                        error = clEnqueueWriteBuffer(
+                            gQueue, gOutBuffer_half, CL_FALSE, 0,
+                            count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
+                    }
+                    if (error)
+                    {
+                        vlog_error("Failure in clWriteArray\n");
                         gFailCount++;
                         goto exit;
                     }
 
-                    error = RunKernel(device, doubleKernels[vectorSize][addressSpace], gInBuffer_double, gOutBuffer_half,
+                    error = RunKernel(device,
+                                      doubleKernels[vectorSize][addressSpace],
+                                      gInBuffer_double, gOutBuffer_half,
                                       numVecs(count, vectorSize, aligned),
                                       runsOverBy(count, vectorSize, aligned));
-                    if (error) {
+                    if (error)
+                    {
                         gFailCount++;
                         goto exit;
                     }
@@ -1438,149 +1879,186 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
                     error = clEnqueueReadBuffer(
                         gQueue, gOutBuffer_half, CL_TRUE, 0,
                         count * sizeof(cl_half), gOut_half, 0, NULL, NULL);
-                    if (error) {
-                        vlog_error( "Failure in clReadArray\n" );
+                    if (error)
+                    {
+                        vlog_error("Failure in clReadArray\n");
                         gFailCount++;
                         goto exit;
                     }
 
                     error = ThreadPool_Do(CheckD, threadCount, &dchk);
-                    if (error) {
-                                gFailCount++;
-                                goto exit;
-                            }
-                        }
+                    if (error)
+                    {
+                        gFailCount++;
+                        goto exit;
                     }
-        }  // end for vector size
+                }
+            }
+        } // end for vector size
 
-        if( ((i+blockCount) & ~printMask) == (i+blockCount) ) {
-            vlog( "." );
-            fflush( stdout );
+        if (((i + blockCount) & ~printMask) == (i + blockCount))
+        {
+            vlog(".");
+            fflush(stdout);
         }
-    }  // for end lastcase
+    } // for end lastcase
 
     loopCount = count == blockCount ? 1 : 100;
-    if( gReportTimes )
+    if (gReportTimes)
     {
-        //Init the input stream
+        // Init the input stream
         cl_float *p = (cl_float *)gIn_single;
-        for( j = 0; j < count; j++ )
-            p[j] = (float)((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+        for (j = 0; j < count; j++)
+            p[j] = (float)((double)(rand() - RAND_MAX / 2) / (RAND_MAX / 2));
 
-        if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_TRUE, 0, count * sizeof( float ), gIn_single, 0, NULL, NULL)) )
+        if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer_single, CL_TRUE, 0,
+                                          count * sizeof(float), gIn_single, 0,
+                                          NULL, NULL)))
         {
-            vlog_error( "Failure in clWriteArray\n" );
+            vlog_error("Failure in clWriteArray\n");
             gFailCount++;
             goto exit;
         }
 
-        if( gTestDouble )
+        if (gTestDouble)
         {
-            //Init the input stream
+            // Init the input stream
             cl_double *q = (cl_double *)gIn_double;
-            for( j = 0; j < count; j++ )
-                q[j] = ((double) (rand() - RAND_MAX/2) / (RAND_MAX/2));
+            for (j = 0; j < count; j++)
+                q[j] = ((double)(rand() - RAND_MAX / 2) / (RAND_MAX / 2));
 
-            if( (error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_TRUE, 0, count * sizeof( double ), gIn_double, 0, NULL, NULL)) )
+            if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer_double, CL_TRUE,
+                                              0, count * sizeof(double),
+                                              gIn_double, 0, NULL, NULL)))
             {
-                vlog_error( "Failure in clWriteArray\n" );
+                vlog_error("Failure in clWriteArray\n");
                 gFailCount++;
                 goto exit;
             }
         }
 
-        //Run again for timing
-        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+        // Run again for timing
+        for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
         {
             uint64_t bestTime = -1ULL;
-            for( j = 0; j < loopCount; j++ )
+            for (j = 0; j < loopCount; j++)
             {
                 uint64_t startTime = ReadTime();
-                if( (error = RunKernel(device, kernels[vectorSize][0], gInBuffer_single, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
-                                       runsOverBy(count, vectorSize, aligned)) ) )
+                if ((error = RunKernel(device, kernels[vectorSize][0],
+                                       gInBuffer_single, gOutBuffer_half,
+                                       numVecs(count, vectorSize, aligned),
+                                       runsOverBy(count, vectorSize, aligned))))
                 {
                     gFailCount++;
                     goto exit;
                 }
 
-                if( (error = clFinish(gQueue)) )
+                if ((error = clFinish(gQueue)))
                 {
-                    vlog_error( "Failure in clFinish\n" );
+                    vlog_error("Failure in clFinish\n");
                     gFailCount++;
                     goto exit;
                 }
                 uint64_t currentTime = ReadTime() - startTime;
-                if( currentTime < bestTime )
-                    bestTime = currentTime;
-                time[ vectorSize ] += currentTime;
+                if (currentTime < bestTime) bestTime = currentTime;
+                time[vectorSize] += currentTime;
             }
-            if( bestTime < min_time[ vectorSize ] )
-                min_time[ vectorSize ] = bestTime ;
+            if (bestTime < min_time[vectorSize])
+                min_time[vectorSize] = bestTime;
 
-            if( gTestDouble )
+            if (gTestDouble)
             {
                 bestTime = -1ULL;
-                for( j = 0; j < loopCount; j++ )
+                for (j = 0; j < loopCount; j++)
                 {
                     uint64_t startTime = ReadTime();
-                    if( (error = RunKernel(device, doubleKernels[vectorSize][0], gInBuffer_double, gOutBuffer_half, numVecs(count, vectorSize, aligned) ,
-                                           runsOverBy(count, vectorSize, aligned)) ) )
+                    if ((error =
+                             RunKernel(device, doubleKernels[vectorSize][0],
+                                       gInBuffer_double, gOutBuffer_half,
+                                       numVecs(count, vectorSize, aligned),
+                                       runsOverBy(count, vectorSize, aligned))))
                     {
                         gFailCount++;
                         goto exit;
                     }
 
-                    if( (error = clFinish(gQueue)) )
+                    if ((error = clFinish(gQueue)))
                     {
-                        vlog_error( "Failure in clFinish\n" );
+                        vlog_error("Failure in clFinish\n");
                         gFailCount++;
                         goto exit;
                     }
                     uint64_t currentTime = ReadTime() - startTime;
-                    if( currentTime < bestTime )
-                        bestTime = currentTime;
-                    doubleTime[ vectorSize ] += currentTime;
+                    if (currentTime < bestTime) bestTime = currentTime;
+                    doubleTime[vectorSize] += currentTime;
                 }
-                if( bestTime < min_double_time[ vectorSize ] )
-                    min_double_time[ vectorSize ] = bestTime;
+                if (bestTime < min_double_time[vectorSize])
+                    min_double_time[vectorSize] = bestTime;
             }
         }
     }
 
-    if( gReportTimes )
+    if (gReportTimes)
     {
-        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-            vlog_perf( SubtractTime( time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
-                      "average us/elem", "vStoreaHalf%s avg. (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
-        for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-            vlog_perf( SubtractTime( min_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
-                      "best us/elem", "vStoreaHalf%s best (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize])  );
-        if( gTestDouble )
+        for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
+            vlog_perf(SubtractTime(time[vectorSize], 0) * 1e6 * gDeviceFrequency
+                          * gComputeDevices / (double)(count * loopCount),
+                      0, "average us/elem",
+                      "vStoreaHalf%s avg. (%s vector size: %d)", roundName,
+                      addressSpaceNames[0], (g_arrVecSizes[vectorSize]));
+        for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+             vectorSize++)
+            vlog_perf(SubtractTime(min_time[vectorSize], 0) * 1e6
+                          * gDeviceFrequency * gComputeDevices / (double)count,
+                      0, "best us/elem",
+                      "vStoreaHalf%s best (%s vector size: %d)", roundName,
+                      addressSpaceNames[0], (g_arrVecSizes[vectorSize]));
+        if (gTestDouble)
         {
-            for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-                vlog_perf( SubtractTime( doubleTime[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) (count * loopCount), 0,
-                          "average us/elem (double)", "vStoreaHalf%s avg. d (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize])  );
-            for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
-                vlog_perf( SubtractTime( min_double_time[ vectorSize ], 0 ) * 1e6 * gDeviceFrequency * gComputeDevices / (double) count, 0,
-                          "best us/elem (double)", "vStoreaHalf%s best d (%s vector size: %d)", roundName, addressSpaceNames[0], (g_arrVecSizes[vectorSize]) );
+            for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+                 vectorSize++)
+                vlog_perf(SubtractTime(doubleTime[vectorSize], 0) * 1e6
+                              * gDeviceFrequency * gComputeDevices
+                              / (double)(count * loopCount),
+                          0, "average us/elem (double)",
+                          "vStoreaHalf%s avg. d (%s vector size: %d)",
+                          roundName, addressSpaceNames[0],
+                          (g_arrVecSizes[vectorSize]));
+            for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+                 vectorSize++)
+                vlog_perf(
+                    SubtractTime(min_double_time[vectorSize], 0) * 1e6
+                        * gDeviceFrequency * gComputeDevices / (double)count,
+                    0, "best us/elem (double)",
+                    "vStoreaHalf%s best d (%s vector size: %d)", roundName,
+                    addressSpaceNames[0], (g_arrVecSizes[vectorSize]));
         }
     }
 
 exit:
-    //clean up
-    for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    // clean up
+    if (!gHostReset)
+    {
+        clReleaseKernel(resetKernel);
+        clReleaseProgram(resetProgram);
+    }
+
+    for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+         vectorSize++)
     {
-        for ( addressSpace = 0; addressSpace < 3; addressSpace++) {
-            clReleaseKernel( kernels[ vectorSize ][addressSpace] );
-            clReleaseProgram( programs[ vectorSize ][addressSpace] );
-            if( gTestDouble )
+        for (addressSpace = 0; addressSpace < 3; addressSpace++)
+        {
+            clReleaseKernel(kernels[vectorSize][addressSpace]);
+            clReleaseProgram(programs[vectorSize][addressSpace]);
+            if (gTestDouble)
             {
-                clReleaseKernel( doubleKernels[ vectorSize ][addressSpace] );
-                clReleaseProgram( doublePrograms[ vectorSize ][addressSpace] );
+                clReleaseKernel(doubleKernels[vectorSize][addressSpace]);
+                clReleaseProgram(doublePrograms[vectorSize][addressSpace]);
             }
         }
     }
 
     return error;
 }
-
diff --git a/test_conformance/half/cl_utils.cpp b/test_conformance/half/cl_utils.cpp
index 68f7b9cd..bd89a750 100644
--- a/test_conformance/half/cl_utils.cpp
+++ b/test_conformance/half/cl_utils.cpp
@@ -35,37 +35,38 @@ const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2
 const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount] = { "half", "int", "int2", "int4", "int8", "int2" };
 
 
-void            *gIn_half = NULL;
-void            *gOut_half = NULL;
-void            *gOut_half_reference = NULL;
-void            *gOut_half_reference_double = NULL;
-void            *gIn_single = NULL;
-void            *gOut_single = NULL;
-void            *gOut_single_reference = NULL;
-void            *gIn_double = NULL;
-// void            *gOut_double = NULL;
-// void            *gOut_double_reference = NULL;
-cl_mem          gInBuffer_half = NULL;
-cl_mem          gOutBuffer_half = NULL;
-cl_mem          gInBuffer_single = NULL;
-cl_mem          gOutBuffer_single = NULL;
-cl_mem          gInBuffer_double = NULL;
-// cl_mem          gOutBuffer_double = NULL;
-
-cl_context       gContext = NULL;
+void *gIn_half = NULL;
+void *gOut_half = NULL;
+void *gOut_half_reference = NULL;
+void *gOut_half_reference_double = NULL;
+void *gIn_single = NULL;
+void *gOut_single = NULL;
+void *gOut_single_reference = NULL;
+void *gIn_double = NULL;
+// void *gOut_double = NULL;
+// void *gOut_double_reference = NULL;
+cl_mem gInBuffer_half = NULL;
+cl_mem gOutBuffer_half = NULL;
+cl_mem gInBuffer_single = NULL;
+cl_mem gOutBuffer_single = NULL;
+cl_mem gInBuffer_double = NULL;
+// cl_mem gOutBuffer_double = NULL;
+
+cl_context gContext = NULL;
 cl_command_queue gQueue = NULL;
-uint32_t        gDeviceFrequency = 0;
-uint32_t        gComputeDevices = 0;
-size_t          gMaxThreadGroupSize = 0;
-size_t          gWorkGroupSize = 0;
-bool            gWimpyMode = false;
-int             gWimpyReductionFactor = 512;
-int             gTestDouble = 0;
+uint32_t gDeviceFrequency = 0;
+uint32_t gComputeDevices = 0;
+size_t gMaxThreadGroupSize = 0;
+size_t gWorkGroupSize = 0;
+bool gWimpyMode = false;
+int gWimpyReductionFactor = 512;
+int gTestDouble = 0;
+bool gHostReset = false;
 
 #if defined( __APPLE__ )
-int             gReportTimes = 1;
+int gReportTimes = 1;
 #else
-int             gReportTimes = 0;
+int gReportTimes = 0;
 #endif
 
 #pragma mark -
@@ -98,17 +99,6 @@ test_status InitCL( cl_device_id device )
     int hasDouble = is_extension_available(device, "cl_khr_fp64");
     gTestDouble ^= hasDouble;
 
-    //detect whether profile of the device is embedded
-    char profile[64] = "";
-    if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) )
-    {
-        vlog_error( "Unable to get device CL DEVICE PROFILE string. (%d) \n", error );
-    }
-    else if( strstr(profile, "EMBEDDED_PROFILE" ) )
-    {
-        gIsEmbedded = 1;
-    }
-
     vlog( "%d compute devices at %f GHz\n", gComputeDevices, (double) gDeviceFrequency / 1000. );
     vlog( "Max thread group size is %lld.\n", (uint64_t) gMaxThreadGroupSize );
 
diff --git a/test_conformance/half/cl_utils.h b/test_conformance/half/cl_utils.h
index 50d8af3d..da6073cf 100644
--- a/test_conformance/half/cl_utils.h
+++ b/test_conformance/half/cl_utils.h
@@ -44,37 +44,38 @@
 #include <CL/opencl.h>
 #endif
 
-extern void            *gIn_half;
-extern void            *gOut_half;
-extern void            *gOut_half_reference;
-extern void            *gOut_half_reference_double;
-extern void            *gIn_single;
-extern void            *gOut_single;
-extern void            *gOut_single_reference;
-extern void            *gIn_double;
-// extern void            *gOut_double;
-// extern void            *gOut_double_reference;
-extern cl_mem          gInBuffer_half;
-extern cl_mem          gOutBuffer_half;
-extern cl_mem          gInBuffer_single;
-extern cl_mem          gOutBuffer_single;
-extern cl_mem          gInBuffer_double;
-// extern cl_mem          gOutBuffer_double;
-
-extern cl_context      gContext;
+extern void *gIn_half;
+extern void *gOut_half;
+extern void *gOut_half_reference;
+extern void *gOut_half_reference_double;
+extern void *gIn_single;
+extern void *gOut_single;
+extern void *gOut_single_reference;
+extern void *gIn_double;
+// extern void *gOut_double;
+// extern void *gOut_double_reference;
+extern cl_mem gInBuffer_half;
+extern cl_mem gOutBuffer_half;
+extern cl_mem gInBuffer_single;
+extern cl_mem gOutBuffer_single;
+extern cl_mem gInBuffer_double;
+// extern cl_mem gOutBuffer_double;
+
+extern cl_context gContext;
 extern cl_command_queue gQueue;
-extern uint32_t        gDeviceFrequency;
-extern uint32_t        gComputeDevices;
-extern size_t          gMaxThreadGroupSize;
-extern size_t          gWorkGroupSize;
-extern int             gTestDouble;
-extern int             gReportTimes;
+extern uint32_t gDeviceFrequency;
+extern uint32_t gComputeDevices;
+extern size_t gMaxThreadGroupSize;
+extern size_t gWorkGroupSize;
+extern int gTestDouble;
+extern int gReportTimes;
+extern bool gHostReset;
 
 // gWimpyMode indicates if we run the test in wimpy mode where we limit the
 // size of 32 bit ranges to a much smaller set.  This is meant to be used
 // as a smoke test
-extern bool            gWimpyMode;
-extern int             gWimpyReductionFactor;
+extern bool gWimpyMode;
+extern int gWimpyReductionFactor;
 
 uint64_t ReadTime( void );
 double SubtractTime( uint64_t endTime, uint64_t startTime );
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 6bc7db95..ee44fb2d 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -194,6 +194,8 @@ static int ParseArgs( int argc, const char **argv )
                         gReportTimes ^= 1;
                         break;
 
+                    case 'r': gHostReset = true; break;
+
                     case 'w':  // Wimpy mode
                         gWimpyMode = true;
                         break;
@@ -235,13 +237,17 @@ static int ParseArgs( int argc, const char **argv )
 
 static void PrintUsage( void )
 {
-    vlog( "%s [-dthw]: <optional: test names>\n", appName );
-    vlog( "\t\t-d\tToggle double precision testing (default: on if double supported)\n" );
-    vlog( "\t\t-t\tToggle reporting performance data.\n" );
-    vlog( "\t\t-w\tRun in wimpy mode\n" );
-    vlog( "\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is 1-12, default factor(%u)\n", gWimpyReductionFactor);
-    vlog( "\t\t-h\tHelp\n" );
-    for( int i = 0; i < test_num; i++ )
+    vlog("%s [-dthw]: <optional: test names>\n", appName);
+    vlog("\t\t-d\tToggle double precision testing (default: on if double "
+         "supported)\n");
+    vlog("\t\t-t\tToggle reporting performance data.\n");
+    vlog("\t\t-r\tReset buffers on host instead of on device.\n");
+    vlog("\t\t-w\tRun in wimpy mode\n");
+    vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
+         "1-12, default factor(%u)\n",
+         gWimpyReductionFactor);
+    vlog("\t\t-h\tHelp\n");
+    for (int i = 0; i < test_num; i++)
     {
         vlog("\t\t%s\n", test_list[i].name );
     }
diff --git a/test_conformance/images/clCopyImage/CMakeLists.txt b/test_conformance/images/clCopyImage/CMakeLists.txt
index d8aace41..bf06dc68 100644
--- a/test_conformance/images/clCopyImage/CMakeLists.txt
+++ b/test_conformance/images/clCopyImage/CMakeLists.txt
@@ -15,5 +15,7 @@ set(${MODULE_NAME}_SOURCES
     ../common.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index 3bd1b6ef..3e0b60d9 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -289,12 +289,6 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr
     return img;
 }
 
-// WARNING -- not thread safe
-BufferOwningPtr<char> srcData;
-BufferOwningPtr<char> dstData;
-BufferOwningPtr<char> srcHost;
-BufferOwningPtr<char> dstHost;
-
 int test_copy_image_generic( cl_context context, cl_command_queue queue, image_descriptor *srcImageInfo, image_descriptor *dstImageInfo,
                             const size_t sourcePos[], const size_t destPos[], const size_t regionSize[], MTdata d )
 {
@@ -302,6 +296,11 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d
 
     clMemWrapper srcImage, dstImage;
 
+    BufferOwningPtr<char> srcData;
+    BufferOwningPtr<char> dstData;
+    BufferOwningPtr<char> srcHost;
+    BufferOwningPtr<char> dstHost;
+
     if( gDebugTrace )
         log_info( " ++ Entering inner test loop...\n" );
 
diff --git a/test_conformance/images/clReadWriteImage/CMakeLists.txt b/test_conformance/images/clReadWriteImage/CMakeLists.txt
index 9308bbfe..bc1600ff 100644
--- a/test_conformance/images/clReadWriteImage/CMakeLists.txt
+++ b/test_conformance/images/clReadWriteImage/CMakeLists.txt
@@ -11,5 +11,7 @@ set(${MODULE_NAME}_SOURCES
     ../common.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index ccd678c1..b5527c74 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -21,7 +21,7 @@ set(${MODULE_NAME}_SOURCES
 
 # Make unused variables not fatal in this module; see
 # https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
-set_gnulike_module_compile_flags("-Wno-error=unused-variable")
+set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable")
 
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 2ce33a17..6d94c2ff 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -69,13 +69,13 @@ int image2d_from_buffer_positive(cl_device_id device, cl_context context,
 {
     if (!is_extension_available(device, "cl_khr_image2d_from_buffer"))
     {
-        printf("Extension cl_khr_image2d_from_buffer not available");
+        log_info("Extension cl_khr_image2d_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -89,17 +89,17 @@ int image2d_from_buffer_positive(cl_device_id device, cl_context context,
                                          CL_MEM_READ_WRITE,
                                          CL_MEM_KERNEL_READ_AND_WRITE };
 
-    for (auto flag : flagTypes)
+    for (auto flagType : flagTypes)
     {
         for (auto imageType : imageTypes)
         {
             /* Get the list of supported image formats */
             std::vector<cl_image_format> formatList;
             if (TEST_PASS
-                    != get_format_list(context, imageType, formatList, flag)
+                    != get_format_list(context, imageType, formatList, flagType)
                 || formatList.size() == 0)
             {
-                test_fail("Failure to get supported formats list");
+                test_fail("Failure to get supported formats list\n");
             }
 
             cl_uint row_pitch_alignment_2d = 0;
@@ -121,15 +121,15 @@ int image2d_from_buffer_positive(cl_device_id device, cl_context context,
                 cl_image_desc image_desc = { 0 };
                 image_desc_init(&image_desc, imageType);
 
-                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                cl_mem_flags flag = (flagType == CL_MEM_KERNEL_READ_AND_WRITE)
                     ? CL_MEM_READ_WRITE
-                    : flag;
+                    : flagType;
 
                 size_t row_pitch_alignment = 0;
                 size_t base_address_alignment = 0;
 
                 int get_error = get_image_requirement_alignment(
-                    device, context, 0, &format, &image_desc,
+                    device, context, flag, &format, &image_desc,
                     &row_pitch_alignment, nullptr, &base_address_alignment);
                 if (TEST_PASS != get_error)
                 {
@@ -143,12 +143,12 @@ int image2d_from_buffer_positive(cl_device_id device, cl_context context,
                 if (base_address_alignment
                     > base_address_alignment_2d * element_size)
                 {
-                    test_fail("Unexpected base_address_alignment");
+                    test_fail("Unexpected base_address_alignment\n");
                 }
 
                 if (row_pitch_alignment > row_pitch_alignment_2d * element_size)
                 {
-                    test_fail("Unexpected row_pitch_alignment");
+                    test_fail("Unexpected row_pitch_alignment\n");
                 }
             }
         }
@@ -167,13 +167,13 @@ int memInfo_image_from_buffer_positive(cl_device_id device, cl_context context,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_from_buffer"))
     {
-        printf("Extension cl_ext_image_from_buffer not available");
+        log_info("Extension cl_ext_image_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -187,17 +187,17 @@ int memInfo_image_from_buffer_positive(cl_device_id device, cl_context context,
                                          CL_MEM_READ_WRITE,
                                          CL_MEM_KERNEL_READ_AND_WRITE };
 
-    for (auto flag : flagTypes)
+    for (auto flagType : flagTypes)
     {
         for (auto imageType : imageTypes)
         {
             /* Get the list of supported image formats */
             std::vector<cl_image_format> formatList;
             if (TEST_PASS
-                    != get_format_list(context, imageType, formatList, flag)
+                    != get_format_list(context, imageType, formatList, flagType)
                 || formatList.size() == 0)
             {
-                test_fail("Failure to get supported formats list");
+                test_fail("Failure to get supported formats list\n");
             }
 
             for (auto format : formatList)
@@ -205,15 +205,15 @@ int memInfo_image_from_buffer_positive(cl_device_id device, cl_context context,
                 cl_image_desc image_desc = { 0 };
                 image_desc_init(&image_desc, imageType);
 
-                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                cl_mem_flags flag = (flagType == CL_MEM_KERNEL_READ_AND_WRITE)
                     ? CL_MEM_READ_WRITE
-                    : flag;
+                    : flagType;
 
                 size_t row_pitch_alignment = 0;
                 size_t slice_pitch_alignment = 0;
 
                 int get_error = get_image_requirement_alignment(
-                    device, context, 0, &format, &image_desc,
+                    device, context, flag, &format, &image_desc,
                     &row_pitch_alignment, &slice_pitch_alignment, nullptr);
                 if (TEST_PASS != get_error)
                 {
@@ -249,7 +249,8 @@ int memInfo_image_from_buffer_positive(cl_device_id device, cl_context context,
 
                 if (returned_buffer != buffer)
                 {
-                    test_fail("Unexpected CL_MEM_ASSOCIATED_MEMOBJECT buffer");
+                    test_fail(
+                        "Unexpected CL_MEM_ASSOCIATED_MEMOBJECT buffer\n");
                 }
 
                 err = clReleaseMemObject(buffer);
@@ -275,13 +276,13 @@ int imageInfo_image_from_buffer_positive(cl_device_id device,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_from_buffer"))
     {
-        printf("Extension cl_ext_image_from_buffer not available");
+        log_info("Extension cl_ext_image_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -295,17 +296,17 @@ int imageInfo_image_from_buffer_positive(cl_device_id device,
                                          CL_MEM_READ_WRITE,
                                          CL_MEM_KERNEL_READ_AND_WRITE };
 
-    for (auto flag : flagTypes)
+    for (auto flagType : flagTypes)
     {
         for (auto imageType : imageTypes)
         {
             /* Get the list of supported image formats */
             std::vector<cl_image_format> formatList;
             if (TEST_PASS
-                    != get_format_list(context, imageType, formatList, flag)
+                    != get_format_list(context, imageType, formatList, flagType)
                 || formatList.size() == 0)
             {
-                test_fail("Failure to get supported formats list");
+                test_fail("Failure to get supported formats list\n");
             }
 
             for (auto format : formatList)
@@ -313,15 +314,15 @@ int imageInfo_image_from_buffer_positive(cl_device_id device,
                 cl_image_desc image_desc = { 0 };
                 image_desc_init(&image_desc, imageType);
 
-                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                cl_mem_flags flag = (flagType == CL_MEM_KERNEL_READ_AND_WRITE)
                     ? CL_MEM_READ_WRITE
-                    : flag;
+                    : flagType;
 
                 size_t row_pitch_alignment = 0;
                 size_t slice_pitch_alignment = 0;
 
                 int get_error = get_image_requirement_alignment(
-                    device, context, 0, &format, &image_desc,
+                    device, context, flag, &format, &image_desc,
                     &row_pitch_alignment, &slice_pitch_alignment, nullptr);
                 if (TEST_PASS != get_error)
                 {
@@ -376,7 +377,7 @@ int imageInfo_image_from_buffer_positive(cl_device_id device,
                     {
                         test_fail(
                             "Unexpected row pitch "
-                            "CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT");
+                            "CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT\n");
                     }
                 }
 
@@ -391,9 +392,9 @@ int imageInfo_image_from_buffer_positive(cl_device_id device,
 
                     if (returned_slice_pitch != slice_pitch)
                     {
-                        test_fail(
-                            "Unexpected row pitch "
-                            "CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT");
+                        test_fail("Unexpected row pitch "
+                                  "CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_"
+                                  "EXT\n");
                     }
                 }
 
@@ -425,13 +426,13 @@ int image_from_buffer_alignment_negative(cl_device_id device,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_from_buffer"))
     {
-        printf("Extension cl_ext_image_from_buffer not available");
+        log_info("Extension cl_ext_image_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -445,17 +446,17 @@ int image_from_buffer_alignment_negative(cl_device_id device,
                                          CL_MEM_READ_WRITE,
                                          CL_MEM_KERNEL_READ_AND_WRITE };
 
-    for (auto flag : flagTypes)
+    for (auto flagType : flagTypes)
     {
         for (auto imageType : imageTypes)
         {
             /* Get the list of supported image formats */
             std::vector<cl_image_format> formatList;
             if (TEST_PASS
-                    != get_format_list(context, imageType, formatList, flag)
+                    != get_format_list(context, imageType, formatList, flagType)
                 || formatList.size() == 0)
             {
-                test_fail("Failure to get supported formats list");
+                test_fail("Failure to get supported formats list\n");
             }
 
             for (auto format : formatList)
@@ -463,16 +464,16 @@ int image_from_buffer_alignment_negative(cl_device_id device,
                 cl_image_desc image_desc = { 0 };
                 image_desc_init(&image_desc, imageType);
 
-                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                cl_mem_flags flag = (flagType == CL_MEM_KERNEL_READ_AND_WRITE)
                     ? CL_MEM_READ_WRITE
-                    : flag;
+                    : flagType;
 
                 size_t row_pitch_alignment = 0;
                 size_t slice_pitch_alignment = 0;
                 size_t base_address_alignment = 0;
 
                 int get_error = get_image_requirement_alignment(
-                    device, context, 0, &format, &image_desc,
+                    device, context, flag, &format, &image_desc,
                     &row_pitch_alignment, &slice_pitch_alignment,
                     &base_address_alignment);
                 if (TEST_PASS != get_error)
@@ -575,13 +576,13 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_from_buffer"))
     {
-        printf("Extension cl_ext_image_from_buffer not available");
+        log_info("Extension cl_ext_image_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -595,17 +596,17 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context,
                                          CL_MEM_READ_WRITE,
                                          CL_MEM_KERNEL_READ_AND_WRITE };
 
-    for (auto flag : flagTypes)
+    for (auto flagType : flagTypes)
     {
         for (auto imageType : imageTypes)
         {
             /* Get the list of supported image formats */
             std::vector<cl_image_format> formatList;
             if (TEST_PASS
-                    != get_format_list(context, imageType, formatList, flag)
+                    != get_format_list(context, imageType, formatList, flagType)
                 || formatList.size() == 0)
             {
-                test_fail("Failure to get supported formats list");
+                test_fail("Failure to get supported formats list\n");
             }
 
             for (auto format : formatList)
@@ -613,9 +614,9 @@ int image_from_small_buffer_negative(cl_device_id device, cl_context context,
                 cl_image_desc image_desc = { 0 };
                 image_desc_init(&image_desc, imageType);
 
-                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                cl_mem_flags flag = (flagType == CL_MEM_KERNEL_READ_AND_WRITE)
                     ? CL_MEM_READ_WRITE
-                    : flag;
+                    : flagType;
 
                 /* Invalid buffer size */
                 cl_int err;
@@ -665,7 +666,7 @@ static int image_from_buffer_fill_check(cl_command_queue queue, cl_mem image,
 
                     if (read_buffer[index] != pattern)
                     {
-                        test_fail("Image pattern check failed");
+                        test_fail("Image pattern check failed\n");
                     }
                 }
             }
@@ -683,13 +684,13 @@ int image_from_buffer_fill_positive(cl_device_id device, cl_context context,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_from_buffer"))
     {
-        printf("Extension cl_ext_image_from_buffer not available");
+        log_info("Extension cl_ext_image_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -703,33 +704,39 @@ int image_from_buffer_fill_positive(cl_device_id device, cl_context context,
                                          CL_MEM_READ_WRITE,
                                          CL_MEM_KERNEL_READ_AND_WRITE };
 
-    for (auto flag : flagTypes)
+    for (auto flagType : flagTypes)
     {
         for (auto imageType : imageTypes)
         {
             /* Get the list of supported image formats */
             std::vector<cl_image_format> formatList;
             if (TEST_PASS
-                    != get_format_list(context, imageType, formatList, flag)
+                    != get_format_list(context, imageType, formatList, flagType)
                 || formatList.size() == 0)
             {
-                test_fail("Failure to get supported formats list");
+                test_fail("Failure to get supported formats list\n");
             }
 
             for (auto format : formatList)
             {
+                if (!IsChannelOrderSupported(format.image_channel_order)
+                    || !IsChannelTypeSupported(format.image_channel_data_type))
+                {
+                    continue;
+                }
+
                 cl_image_desc image_desc = { 0 };
                 image_desc_init(&image_desc, imageType);
 
-                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                cl_mem_flags flag = (flagType == CL_MEM_KERNEL_READ_AND_WRITE)
                     ? CL_MEM_READ_WRITE
-                    : flag;
+                    : flagType;
 
                 size_t row_pitch_alignment = 0;
                 size_t slice_pitch_alignment = 0;
 
                 int get_error = get_image_requirement_alignment(
-                    device, context, 0, &format, &image_desc,
+                    device, context, flag, &format, &image_desc,
                     &row_pitch_alignment, &slice_pitch_alignment, nullptr);
                 if (TEST_PASS != get_error)
                 {
@@ -877,7 +884,7 @@ static int image_from_buffer_read_check(cl_command_queue queue, cl_mem buffer,
             {
                 if (host_ptr[j] != pattern)
                 {
-                    test_fail("Image pattern check failed");
+                    test_fail("Image pattern check failed\n");
                 }
             }
             host_ptr = host_ptr + row_pitch;
@@ -897,13 +904,13 @@ int image_from_buffer_read_positive(cl_device_id device, cl_context context,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_from_buffer"))
     {
-        printf("Extension cl_ext_image_from_buffer not available");
+        log_info("Extension cl_ext_image_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -939,8 +946,11 @@ int image_from_buffer_read_positive(cl_device_id device, cl_context context,
 
         const size_t row_pitch =
             aligned_size(TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
-        const size_t slice_pitch =
-            aligned_size(row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+        const size_t slice_pitch = aligned_size(
+            row_pitch
+                * (imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY ? 1
+                                                            : TEST_IMAGE_SIZE),
+            slice_pitch_alignment);
 
         const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
 
@@ -1010,4 +1020,4 @@ int image_from_buffer_read_positive(cl_device_id device, cl_context context,
     }
 
     return TEST_PASS;
-}
-\ No newline at end of file
+}
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp
index 9212fcbc..a4da4c5d 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp
@@ -31,7 +31,7 @@ int cl_image_requirements_size_ext_negative(cl_device_id device,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -80,13 +80,13 @@ int cl_image_requirements_size_ext_consistency(cl_device_id device,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
     if (!is_extension_available(device, "cl_ext_image_from_buffer"))
     {
-        printf("Extension cl_ext_image_from_buffer not available");
+        log_info("Extension cl_ext_image_from_buffer not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -116,7 +116,7 @@ int cl_image_requirements_size_ext_consistency(cl_device_id device,
                     != get_format_list(context, imageType, formatList, flag)
                 || formatList.size() == 0)
             {
-                test_fail("Failure to get supported formats list");
+                test_fail("Failure to get supported formats list\n");
             }
 
             for (auto format : formatList)
@@ -154,7 +154,7 @@ int cl_image_requirements_size_ext_consistency(cl_device_id device,
                 if (max_size != size)
                 {
                     test_fail("CL_IMAGE_REQUIREMENTS_SIZE_EXT different from "
-                              "CL_MEM_SIZE");
+                              "CL_MEM_SIZE\n");
                 }
 
                 err = clReleaseMemObject(image_buffer);
@@ -179,7 +179,7 @@ int clGetImageRequirementsInfoEXT_negative(cl_device_id device,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -292,7 +292,7 @@ int cl_image_requirements_max_val_ext_negative(cl_device_id device,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -387,7 +387,7 @@ int cl_image_requirements_max_val_ext_positive(cl_device_id device,
 {
     if (!is_extension_available(device, "cl_ext_image_requirements_info"))
     {
-        printf("Extension cl_ext_image_requirements_info not available");
+        log_info("Extension cl_ext_image_requirements_info not available\n");
         return TEST_SKIPPED_ITSELF;
     }
 
@@ -412,7 +412,7 @@ int cl_image_requirements_max_val_ext_positive(cl_device_id device,
 
     if (!(max_width <= width_1d && max_width > 0))
     {
-        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT value");
+        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT value\n");
     }
 
     /* CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT */
@@ -433,7 +433,7 @@ int cl_image_requirements_max_val_ext_positive(cl_device_id device,
 
     if (!(max_height <= height_2d && max_height > 0))
     {
-        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT value");
+        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT value\n");
     }
 
     /* CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT */
@@ -454,7 +454,7 @@ int cl_image_requirements_max_val_ext_positive(cl_device_id device,
 
     if (!(max_depth <= depth_3d && max_depth > 0))
     {
-        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT value");
+        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT value\n");
     }
 
     /* CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT */
@@ -475,8 +475,9 @@ int cl_image_requirements_max_val_ext_positive(cl_device_id device,
 
     if (!(max_array_size <= array_size && max_array_size > 0))
     {
-        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT value");
+        test_fail(
+            "Unexpected CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT value\n");
     }
 
     return TEST_PASS;
-}
-\ No newline at end of file
+}
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 05aed02c..de7ed0fd 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -281,59 +281,6 @@ template <class T> int determine_validation_error( void *imagePtr, image_descrip
     return 0;
 }
 
-static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d )
-{
-    size_t i = 0;
-    if( gDisableOffsets )
-    {
-        for( size_t y = 0; y < imageInfo->height; y++ )
-        {
-            for( size_t x = 0; x < imageInfo->width; x++, i++ )
-            {
-                xOffsets[ i ] = (float) (xfract + (double) x);
-                yOffsets[ i ] = (float) (yfract + (double) y);
-            }
-        }
-    }
-    else
-    {
-        for( size_t y = 0; y < imageInfo->height; y++ )
-        {
-            for( size_t x = 0; x < imageInfo->width; x++, i++ )
-            {
-                xOffsets[ i ] = (float) (xfract + (double) ((int) x + random_in_range( -10, 10, d )));
-                yOffsets[ i ] = (float) (yfract + (double) ((int) y + random_in_range( -10, 10, d )));
-            }
-        }
-    }
-
-    if( imageSampler->addressing_mode == CL_ADDRESS_NONE )
-    {
-        i = 0;
-        for( size_t y = 0; y < imageInfo->height; y++ )
-        {
-            for( size_t x = 0; x < imageInfo->width; x++, i++ )
-            {
-                xOffsets[ i ] = (float) CLAMP( (double) xOffsets[ i ], 0.0, (double) imageInfo->width - 1.0);
-                yOffsets[ i ] = (float) CLAMP( (double) yOffsets[ i ], 0.0, (double)imageInfo->height - 1.0);
-            }
-        }
-    }
-
-    if( normalized_coords )
-    {
-        i = 0;
-        for( size_t y = 0; y < imageInfo->height; y++ )
-        {
-            for( size_t x = 0; x < imageInfo->width; x++, i++ )
-            {
-                xOffsets[ i ] = (float) ((double) xOffsets[ i ] / (double) imageInfo->width);
-                yOffsets[ i ] = (float) ((double) yOffsets[ i ] / (double) imageInfo->height);
-            }
-        }
-    }
-}
-
 static void InitFloatCoords( image_descriptor *imageInfo, image_sampler_data *imageSampler, float *xOffsets, float *yOffsets, float xfract, float yfract, int normalized_coords, MTdata d, size_t lod)
 {
     size_t i = 0;
@@ -1589,7 +1536,6 @@ int test_read_image_2D( cl_context context, cl_command_queue queue, cl_kernel ke
             if (retCode)
                 return retCode;
         }
-        end:
         if ( gTestMipmaps )
         {
             nextLevelOffset += width_lod * height_lod * get_pixel_size( imageInfo->format );
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index 2a722088..0cbf0989 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -434,7 +434,6 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
         float lod_float = (float)lod;
         size_t resultValuesSize = width_lod * get_explicit_type_size( outputType ) * 4;
         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
-        char *imagePtr = (char*)imageValues + nextLevelOffset;
         if (gTestMipmaps) {
             //Set the lod kernel arg
             if(gDebugTrace)
diff --git a/test_conformance/images/samplerlessReads/main.cpp b/test_conformance/images/samplerlessReads/main.cpp
index cd377793..a8eb3e36 100644
--- a/test_conformance/images/samplerlessReads/main.cpp
+++ b/test_conformance/images/samplerlessReads/main.cpp
@@ -43,8 +43,12 @@ extern int test_image_set( cl_device_id device, cl_context context, cl_command_q
 
 int test_1D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
-    return test_image_set( device, context, queue, CL_MEM_OBJECT_IMAGE1D ) +
-           test_image_set( device, context, queue, CL_MEM_OBJECT_IMAGE1D_BUFFER );
+    return test_image_set(device, context, queue, CL_MEM_OBJECT_IMAGE1D);
+}
+int test_1Dbuffer(cl_device_id device, cl_context context,
+                  cl_command_queue queue, int num_elements)
+{
+    return test_image_set(device, context, queue, CL_MEM_OBJECT_IMAGE1D_BUFFER);
 }
 int test_2D(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
 {
@@ -64,11 +68,8 @@ int test_2Darray(cl_device_id device, cl_context context, cl_command_queue queue
 }
 
 test_definition test_list[] = {
-    ADD_TEST( 1D ),
-    ADD_TEST( 2D ),
-    ADD_TEST( 3D ),
-    ADD_TEST( 1Darray ),
-    ADD_TEST( 2Darray ),
+    ADD_TEST(1D), ADD_TEST(1Dbuffer), ADD_TEST(2D),
+    ADD_TEST(3D), ADD_TEST(1Darray),  ADD_TEST(2Darray),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
index c3a991a7..414a7d53 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -78,8 +78,9 @@ int test_read_image_1D_buffer( cl_context context, cl_command_queue queue, cl_ke
     error = clGetMemObjectInfo(image[0], CL_MEM_ASSOCIATED_MEMOBJECT, sizeof(ret), &ret, NULL);
     if ( error != CL_SUCCESS )
     {
-      log_error( "ERROR: Unable to query CL_MEM_ASSOCIATED_MEMOBJECT\n", IGetErrorString( error ) );
-      return error;
+        log_error("ERROR: Unable to query CL_MEM_ASSOCIATED_MEMOBJECT (%s)\n",
+                  IGetErrorString(error));
+        return error;
     }
 
     if (ret != imageBuffer) {
diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt
index 5344eabc..7bc991f8 100644
--- a/test_conformance/integer_ops/CMakeLists.txt
+++ b/test_conformance/integer_ops/CMakeLists.txt
@@ -12,6 +12,9 @@ set(${MODULE_NAME}_SOURCES
     verification_and_generation_functions.cpp
     test_popcount.cpp
     test_integer_dot_product.cpp
+    test_extended_bit_ops_extract.cpp
+    test_extended_bit_ops_insert.cpp
+    test_extended_bit_ops_reverse.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp
index e57cffd9..59840de7 100644
--- a/test_conformance/integer_ops/main.cpp
+++ b/test_conformance/integer_ops/main.cpp
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2017-2022 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -59,6 +59,10 @@ test_definition test_list[] = {
     ADD_TEST(integer_mul24),
     ADD_TEST(integer_mad24),
 
+    ADD_TEST(extended_bit_ops_extract),
+    ADD_TEST(extended_bit_ops_insert),
+    ADD_TEST(extended_bit_ops_reverse),
+
     ADD_TEST(long_math),
     ADD_TEST(long_logic),
     ADD_TEST(long_shift),
diff --git a/test_conformance/integer_ops/procs.h b/test_conformance/integer_ops/procs.h
index 82311fb9..0907938f 100644
--- a/test_conformance/integer_ops/procs.h
+++ b/test_conformance/integer_ops/procs.h
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2017-2022 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,7 +15,6 @@
 //
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/testHarness.h"
 #include "harness/mt19937.h"
@@ -61,6 +60,18 @@ extern int test_integer_sub_sat(cl_device_id deviceID, cl_context context, cl_co
 extern int test_integer_mul24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_integer_mad24(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
+extern int test_extended_bit_ops_extract(cl_device_id device_id,
+                                         cl_context context,
+                                         cl_command_queue commands,
+                                         int num_elements);
+extern int test_extended_bit_ops_insert(cl_device_id device_id,
+                                        cl_context context,
+                                        cl_command_queue commands,
+                                        int num_elements);
+extern int test_extended_bit_ops_reverse(cl_device_id device_id,
+                                         cl_context context,
+                                         cl_command_queue commands,
+                                         int num_elements);
 
 extern int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/integer_ops/test_absdiff.cpp b/test_conformance/integer_ops/test_absdiff.cpp
index 7459bd2d..710b9c4e 100644
--- a/test_conformance/integer_ops/test_absdiff.cpp
+++ b/test_conformance/integer_ops/test_absdiff.cpp
@@ -22,6 +22,17 @@
 
 #include "procs.h"
 
+template <class Integer>
+static typename std::make_unsigned<Integer>::type abs_diff(Integer a, Integer b)
+{
+    using Unsigned = typename std::make_unsigned<Integer>::type;
+    Unsigned ua = a;
+    Unsigned ub = b;
+    Unsigned diff = ua - ub;
+    if (a < b) diff = -diff;
+    return diff;
+}
+
 static int verify_absdiff_char( const void *p, const void *q, const void *r, size_t n, const char *sizeName, size_t vecSize )
 {
     const cl_char *inA = (const cl_char *)p;
@@ -30,9 +41,7 @@ static int verify_absdiff_char( const void *p, const void *q, const void *r, siz
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uchar r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uchar r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -47,9 +56,7 @@ static int verify_absdiff_uchar( const void *p, const void *q, const void *r, si
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uchar r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uchar r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -64,9 +71,7 @@ static int verify_absdiff_short( const void *p, const void *q, const void *r, si
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ushort r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ushort r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -81,9 +86,7 @@ static int verify_absdiff_ushort( const void *p, const void *q, const void *r, s
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ushort r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ushort r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -98,9 +101,7 @@ static int verify_absdiff_int( const void *p, const void *q, const void *r, size
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uint r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uint r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         {
             log_info( "%ld) Failure for absdiff( (int%s) 0x%8.8x, (int%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] );
@@ -118,9 +119,7 @@ static int verify_absdiff_uint( const void *p, const void *q, const void *r, siz
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_uint r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_uint r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (uint%s) 0x%8.8x, (uint%s) 0x%8.8x) = *0x%8.8x vs 0x%8.8x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -135,9 +134,7 @@ static int verify_absdiff_long( const void *p, const void *q, const void *r, siz
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ulong r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ulong r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (long%s) 0x%16.16llx, (long%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
@@ -152,9 +149,7 @@ static int verify_absdiff_ulong( const void *p, const void *q, const void *r, si
     size_t i;
     for( i = 0; i < n; i++ )
     {
-        cl_ulong r = inA[i] - inB[i];
-        if( inB[i] > inA[i] )
-            r = inB[i] - inA[i];
+        cl_ulong r = abs_diff(inA[i], inB[i]);
         if( r != outptr[i] )
         { log_info( "%ld) Failure for absdiff( (ulong%s) 0x%16.16llx, (ulong%s) 0x%16.16llx) = *0x%16.16llx vs 0x%16.16llx\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
diff --git a/test_conformance/integer_ops/test_extended_bit_ops_extract.cpp b/test_conformance/integer_ops/test_extended_bit_ops_extract.cpp
new file mode 100644
index 00000000..9b4e0950
--- /dev/null
+++ b/test_conformance/integer_ops/test_extended_bit_ops_extract.cpp
@@ -0,0 +1,287 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "procs.h"
+#include "harness/integer_ops_test_info.h"
+#include "harness/testHarness.h"
+
+template <typename T>
+static typename std::make_unsigned<T>::type
+arithmetic_shift_right(T tx, cl_uint count)
+{
+    typedef typename std::make_unsigned<T>::type unsigned_t;
+    unsigned_t x = static_cast<unsigned_t>(tx);
+
+    // To implement an arithmetic shift right:
+    // - If the sign bit is not set, shift as usual.
+    // - Otherwise, flip all of the bits, shift, then flip back.
+    unsigned_t s = -(x >> (sizeof(x) * 8 - 1));
+    unsigned_t result = (s ^ x) >> count ^ s;
+
+    return result;
+}
+
+template <typename T>
+static typename std::make_unsigned<T>::type
+cpu_bit_extract_signed(T tbase, cl_uint offset, cl_uint count)
+{
+    typedef typename std::make_signed<T>::type unsigned_t;
+
+    assert(offset <= sizeof(T) * 8);
+    assert(count <= sizeof(T) * 8);
+    assert(offset + count <= sizeof(T) * 8);
+
+    unsigned_t base = static_cast<unsigned_t>(tbase);
+    unsigned_t result;
+
+    if (count == 0)
+    {
+        result = 0;
+    }
+    else
+    {
+        result = base << (sizeof(T) * 8 - offset - count);
+        result = arithmetic_shift_right(result, sizeof(T) * 8 - count);
+    }
+
+    return result;
+}
+
+template <typename T>
+static typename std::make_unsigned<T>::type
+cpu_bit_extract_unsigned(T tbase, cl_uint offset, cl_uint count)
+{
+    typedef typename std::make_unsigned<T>::type unsigned_t;
+
+    assert(offset <= sizeof(T) * 8);
+    assert(count <= sizeof(T) * 8);
+    assert(offset + count <= sizeof(T) * 8);
+
+    unsigned_t base = static_cast<unsigned_t>(tbase);
+    unsigned_t result;
+
+    if (count == 0)
+    {
+        result = 0;
+    }
+    else
+    {
+        result = base << (sizeof(T) * 8 - offset - count);
+        result = result >> (sizeof(T) * 8 - count);
+    }
+
+    return result;
+}
+
+template <typename T, size_t N>
+static void
+calculate_reference(std::vector<typename std::make_unsigned<T>::type>& sref,
+                    std::vector<typename std::make_unsigned<T>::type>& uref,
+                    const std::vector<T>& base)
+{
+    sref.resize(base.size());
+    uref.resize(base.size());
+    for (size_t i = 0; i < base.size(); i++)
+    {
+        cl_uint offset = (i / N) / (sizeof(T) * 8 + 1);
+        cl_uint count = (i / N) % (sizeof(T) * 8 + 1);
+        if (offset + count > sizeof(T) * 8)
+        {
+            count = (sizeof(T) * 8) - offset;
+        }
+        sref[i] = cpu_bit_extract_signed(base[i], offset, count);
+        uref[i] = cpu_bit_extract_unsigned(base[i], offset, count);
+    }
+}
+
+static constexpr const char* kernel_source = R"CLC(
+__kernel void test_bitfield_extract(__global SIGNED_TYPE* sdst, __global UNSIGNED_TYPE* udst, __global TYPE* base)
+{
+    int index = get_global_id(0);
+    uint offset = index / (sizeof(BASETYPE) * 8 + 1);
+    uint count = index % (sizeof(BASETYPE) * 8 + 1);
+    if (offset + count > sizeof(BASETYPE) * 8) {
+        count = (sizeof(BASETYPE) * 8) - offset;
+    }
+    sdst[index] = bitfield_extract_signed(base[index], offset, count);
+    udst[index] = bitfield_extract_unsigned(base[index], offset, count);
+}
+)CLC";
+
+static constexpr const char* kernel_source_vec3 = R"CLC(
+__kernel void test_bitfield_extract(__global SIGNED_BASETYPE* sdst, __global UNSIGNED_BASETYPE* udst, __global BASETYPE* base)
+{
+    int index = get_global_id(0);
+    uint offset = index / (sizeof(BASETYPE) * 8 + 1);
+    uint count = index % (sizeof(BASETYPE) * 8 + 1);
+    if (offset + count > sizeof(BASETYPE) * 8) {
+        count = (sizeof(BASETYPE) * 8) - offset;
+    }
+    TYPE b = vload3(index, base);
+    SIGNED_TYPE s = bitfield_extract_signed(b, offset, count);
+    UNSIGNED_TYPE u = bitfield_extract_unsigned(b, offset, count);
+    vstore3(s, index, sdst);
+    vstore3(u, index, udst);
+}
+)CLC";
+
+template <typename T, size_t N>
+static int test_vectype(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
+{
+    // Because converting from an unsigned type to a signed type is
+    // implementation-defined if the most significant bit is set until C++ 20,
+    // compute all reference results using unsigned types.
+    typedef typename std::make_unsigned<T>::type unsigned_t;
+
+    cl_int error = CL_SUCCESS;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    std::string buildOptions;
+    buildOptions += " -DTYPE=";
+    buildOptions +=
+        TestInfo<T>::deviceTypeName + ((N > 1) ? std::to_string(N) : "");
+    buildOptions += " -DSIGNED_TYPE=";
+    buildOptions +=
+        TestInfo<T>::deviceTypeNameSigned + ((N > 1) ? std::to_string(N) : "");
+    buildOptions += " -DUNSIGNED_TYPE=";
+    buildOptions += TestInfo<T>::deviceTypeNameUnsigned
+        + ((N > 1) ? std::to_string(N) : "");
+    buildOptions += " -DBASETYPE=";
+    buildOptions += TestInfo<T>::deviceTypeName;
+    buildOptions += " -DSIGNED_BASETYPE=";
+    buildOptions += TestInfo<T>::deviceTypeNameSigned;
+    buildOptions += " -DUNSIGNED_BASETYPE=";
+    buildOptions += TestInfo<T>::deviceTypeNameUnsigned;
+
+    const size_t ELEMENTS_TO_TEST = (sizeof(T) * 8 + 1) * (sizeof(T) * 8 + 1);
+
+    std::vector<T> base(ELEMENTS_TO_TEST * N);
+    fill_vector_with_random_data(base);
+
+    std::vector<unsigned_t> sreference;
+    std::vector<unsigned_t> ureference;
+    calculate_reference<T, N>(sreference, ureference, base);
+
+    const char* source = (N == 3) ? kernel_source_vec3 : kernel_source;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                        "test_bitfield_extract",
+                                        buildOptions.c_str());
+    test_error(error, "Unable to create test_bitfield_insert kernel");
+
+    clMemWrapper sdst =
+        clCreateBuffer(context, 0, sreference.size() * sizeof(T), NULL, &error);
+    test_error(error, "Unable to create signed output buffer");
+
+    clMemWrapper udst =
+        clCreateBuffer(context, 0, ureference.size() * sizeof(T), NULL, &error);
+    test_error(error, "Unable to create unsigned output buffer");
+
+    clMemWrapper src_base =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, base.size() * sizeof(T),
+                       base.data(), &error);
+    test_error(error, "Unable to create base buffer");
+
+    error = clSetKernelArg(kernel, 0, sizeof(sdst), &sdst);
+    test_error(error, "Unable to set signed output buffer kernel arg");
+
+    error = clSetKernelArg(kernel, 1, sizeof(udst), &udst);
+    test_error(error, "Unable to set unsigned output buffer kernel arg");
+
+    error = clSetKernelArg(kernel, 2, sizeof(src_base), &src_base);
+    test_error(error, "Unable to set base buffer kernel arg");
+
+    size_t global_work_size[] = { sreference.size() / N };
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
+                                   NULL, 0, NULL, NULL);
+    test_error(error, "Unable to enqueue test kernel");
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed after test kernel");
+
+    std::vector<unsigned_t> sresults(sreference.size(), 99);
+    error = clEnqueueReadBuffer(queue, sdst, CL_TRUE, 0,
+                                sresults.size() * sizeof(T), sresults.data(), 0,
+                                NULL, NULL);
+    test_error(error, "Unable to read signed data after test kernel");
+
+    if (sresults != sreference)
+    {
+        log_error("Signed result buffer did not match reference buffer!\n");
+        return TEST_FAIL;
+    }
+
+    std::vector<unsigned_t> uresults(ureference.size(), 99);
+    error = clEnqueueReadBuffer(queue, udst, CL_TRUE, 0,
+                                uresults.size() * sizeof(T), uresults.data(), 0,
+                                NULL, NULL);
+    test_error(error, "Unable to read unsigned data after test kernel");
+
+    if (uresults != ureference)
+    {
+        log_error("Unsigned result buffer did not match reference buffer!\n");
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
+template <typename T>
+static int test_type(cl_device_id device, cl_context context,
+                     cl_command_queue queue)
+{
+    log_info("    testing type %s\n", TestInfo<T>::deviceTypeName);
+
+    return test_vectype<T, 1>(device, context, queue)
+        | test_vectype<T, 2>(device, context, queue)
+        | test_vectype<T, 3>(device, context, queue)
+        | test_vectype<T, 4>(device, context, queue)
+        | test_vectype<T, 8>(device, context, queue)
+        | test_vectype<T, 16>(device, context, queue);
+}
+
+int test_extended_bit_ops_extract(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    if (is_extension_available(device, "cl_khr_extended_bit_ops"))
+    {
+        int result = TEST_PASS;
+
+        result |= test_type<cl_char>(device, context, queue);
+        result |= test_type<cl_uchar>(device, context, queue);
+        result |= test_type<cl_short>(device, context, queue);
+        result |= test_type<cl_ushort>(device, context, queue);
+        result |= test_type<cl_int>(device, context, queue);
+        result |= test_type<cl_uint>(device, context, queue);
+        if (gHasLong)
+        {
+            result |= test_type<cl_long>(device, context, queue);
+            result |= test_type<cl_ulong>(device, context, queue);
+        }
+        return result;
+    }
+
+    log_info("cl_khr_extended_bit_ops is not supported\n");
+    return TEST_SKIPPED_ITSELF;
+}
diff --git a/test_conformance/integer_ops/test_extended_bit_ops_insert.cpp b/test_conformance/integer_ops/test_extended_bit_ops_insert.cpp
new file mode 100644
index 00000000..e6d8522c
--- /dev/null
+++ b/test_conformance/integer_ops/test_extended_bit_ops_insert.cpp
@@ -0,0 +1,214 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+#include <type_traits>
+#include <vector>
+
+#include "procs.h"
+#include "harness/integer_ops_test_info.h"
+#include "harness/testHarness.h"
+
+template <typename T>
+static typename std::make_unsigned<T>::type
+cpu_bit_insert(T tbase, T tinsert, cl_uint offset, cl_uint count)
+{
+    assert(offset <= sizeof(T) * 8);
+    assert(count <= sizeof(T) * 8);
+    assert(offset + count <= sizeof(T) * 8);
+
+    cl_ulong base = static_cast<cl_ulong>(tbase);
+    cl_ulong insert = static_cast<cl_ulong>(tinsert);
+
+    cl_ulong mask = (count < 64) ? ((1ULL << count) - 1) << offset : ~0ULL;
+    cl_ulong result = ((insert << offset) & mask) | (base & ~mask);
+
+    return static_cast<typename std::make_unsigned<T>::type>(result);
+}
+
+template <typename T, size_t N>
+static void
+calculate_reference(std::vector<typename std::make_unsigned<T>::type>& ref,
+                    const std::vector<T>& base, const std::vector<T>& insert)
+{
+    ref.resize(base.size());
+    for (size_t i = 0; i < base.size(); i++)
+    {
+        cl_uint offset = (i / N) / (sizeof(T) * 8 + 1);
+        cl_uint count = (i / N) % (sizeof(T) * 8 + 1);
+        if (offset + count > sizeof(T) * 8)
+        {
+            count = (sizeof(T) * 8) - offset;
+        }
+        ref[i] = cpu_bit_insert(base[i], insert[i], offset, count);
+    }
+}
+
+static constexpr const char* kernel_source = R"CLC(
+__kernel void test_bitfield_insert(__global TYPE* dst, __global TYPE* base, __global TYPE* insert)
+{
+    int index = get_global_id(0);
+    uint offset = index / (sizeof(BASETYPE) * 8 + 1);
+    uint count = index % (sizeof(BASETYPE) * 8 + 1);
+    if (offset + count > sizeof(BASETYPE) * 8) {
+        count = (sizeof(BASETYPE) * 8) - offset;
+    }
+    dst[index] = bitfield_insert(base[index], insert[index], offset, count);
+}
+)CLC";
+
+static constexpr const char* kernel_source_vec3 = R"CLC(
+__kernel void test_bitfield_insert(__global BASETYPE* dst, __global BASETYPE* base, __global BASETYPE* insert)
+{
+    int index = get_global_id(0);
+    uint offset = index / (sizeof(BASETYPE) * 8 + 1);
+    uint count = index % (sizeof(BASETYPE) * 8 + 1);
+    if (offset + count > sizeof(BASETYPE) * 8) {
+        count = (sizeof(BASETYPE) * 8) - offset;
+    }
+    TYPE b = vload3(index, base);
+    TYPE i = vload3(index, insert);
+    TYPE d = bitfield_insert(b, i, offset, count);
+    vstore3(d, index, dst);
+}
+)CLC";
+
+template <typename T, size_t N>
+static int test_vectype(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
+{
+    // Because converting from an unsigned type to a signed type is
+    // implementation-defined if the most significant bit is set until C++ 20,
+    // compute all reference results using unsigned types.
+    typedef typename std::make_unsigned<T>::type unsigned_t;
+
+    cl_int error = CL_SUCCESS;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    std::string buildOptions{ "-DTYPE=" };
+    buildOptions += TestInfo<T>::deviceTypeName;
+    if (N > 1)
+    {
+        buildOptions += std::to_string(N);
+    }
+    buildOptions += " -DBASETYPE=";
+    buildOptions += TestInfo<T>::deviceTypeName;
+
+    const size_t ELEMENTS_TO_TEST = (sizeof(T) * 8 + 1) * (sizeof(T) * 8 + 1);
+
+    std::vector<T> base(ELEMENTS_TO_TEST * N);
+    std::fill(base.begin(), base.end(), static_cast<T>(0xA5A5A5A5A5A5A5A5ULL));
+
+    std::vector<T> insert(ELEMENTS_TO_TEST * N);
+    fill_vector_with_random_data(insert);
+
+    std::vector<unsigned_t> reference;
+    calculate_reference<T, N>(reference, base, insert);
+
+    const char* source = (N == 3) ? kernel_source_vec3 : kernel_source;
+    error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                        "test_bitfield_insert",
+                                        buildOptions.c_str());
+    test_error(error, "Unable to create test_bitfield_insert kernel");
+
+    clMemWrapper dst =
+        clCreateBuffer(context, 0, reference.size() * sizeof(T), NULL, &error);
+    test_error(error, "Unable to create output buffer");
+
+    clMemWrapper src_base =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, base.size() * sizeof(T),
+                       base.data(), &error);
+    test_error(error, "Unable to create base buffer");
+
+    clMemWrapper src_insert =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, insert.size() * sizeof(T),
+                       insert.data(), &error);
+    test_error(error, "Unable to create insert buffer");
+
+    error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
+    test_error(error, "Unable to set output buffer kernel arg");
+
+    error = clSetKernelArg(kernel, 1, sizeof(src_base), &src_base);
+    test_error(error, "Unable to set base buffer kernel arg");
+
+    error = clSetKernelArg(kernel, 2, sizeof(src_insert), &src_insert);
+    test_error(error, "Unable to set insert buffer kernel arg");
+
+    size_t global_work_size[] = { reference.size() / N };
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
+                                   NULL, 0, NULL, NULL);
+    test_error(error, "Unable to enqueue test kernel");
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed after test kernel");
+
+    std::vector<unsigned_t> results(reference.size(), 99);
+    error =
+        clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, results.size() * sizeof(T),
+                            results.data(), 0, NULL, NULL);
+    test_error(error, "Unable to read data after test kernel");
+
+    if (results != reference)
+    {
+        log_error("Result buffer did not match reference buffer!\n");
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
+template <typename T>
+static int test_type(cl_device_id device, cl_context context,
+                     cl_command_queue queue)
+{
+    log_info("    testing type %s\n", TestInfo<T>::deviceTypeName);
+
+    return test_vectype<T, 1>(device, context, queue)
+        | test_vectype<T, 2>(device, context, queue)
+        | test_vectype<T, 3>(device, context, queue)
+        | test_vectype<T, 4>(device, context, queue)
+        | test_vectype<T, 8>(device, context, queue)
+        | test_vectype<T, 16>(device, context, queue);
+}
+
+int test_extended_bit_ops_insert(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements)
+{
+    if (is_extension_available(device, "cl_khr_extended_bit_ops"))
+    {
+        int result = TEST_PASS;
+
+        result |= test_type<cl_char>(device, context, queue);
+        result |= test_type<cl_uchar>(device, context, queue);
+        result |= test_type<cl_short>(device, context, queue);
+        result |= test_type<cl_ushort>(device, context, queue);
+        result |= test_type<cl_int>(device, context, queue);
+        result |= test_type<cl_uint>(device, context, queue);
+        if (gHasLong)
+        {
+            result |= test_type<cl_long>(device, context, queue);
+            result |= test_type<cl_ulong>(device, context, queue);
+        }
+        return result;
+    }
+
+    log_info("cl_khr_extended_bit_ops is not supported\n");
+    return TEST_SKIPPED_ITSELF;
+}
diff --git a/test_conformance/integer_ops/test_extended_bit_ops_reverse.cpp b/test_conformance/integer_ops/test_extended_bit_ops_reverse.cpp
new file mode 100644
index 00000000..136f9d1d
--- /dev/null
+++ b/test_conformance/integer_ops/test_extended_bit_ops_reverse.cpp
@@ -0,0 +1,177 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <algorithm>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "procs.h"
+#include "harness/integer_ops_test_info.h"
+#include "harness/testHarness.h"
+
+template <typename T> static T cpu_bit_reverse(T base)
+{
+    T result = 0;
+
+    const size_t count = sizeof(T) * 8;
+    for (size_t i = 0; i < count; i++)
+    {
+        if (base & ((T)1 << i))
+        {
+            result |= ((T)1 << (count - i - 1));
+        }
+    }
+    return result;
+}
+
+template <typename T>
+static void calculate_reference(std::vector<T>& ref, const std::vector<T>& base)
+{
+    ref.resize(base.size());
+    for (size_t i = 0; i < base.size(); i++)
+    {
+        ref[i] = cpu_bit_reverse(base[i]);
+    }
+}
+
+static constexpr const char* kernel_source = R"CLC(
+__kernel void test_bit_reverse(__global TYPE* dst, __global TYPE* base)
+{
+    int index = get_global_id(0);
+    dst[index] = bit_reverse(base[index]);
+}
+)CLC";
+
+static constexpr const char* kernel_source_vec3 = R"CLC(
+__kernel void test_bit_reverse(__global BASETYPE* dst, __global BASETYPE* base)
+{
+    int index = get_global_id(0);
+    TYPE s = vload3(index, base);
+    TYPE d = bit_reverse(s);
+    vstore3(d, index, dst);
+}
+)CLC";
+
+template <typename T, size_t N>
+static int test_vectype(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
+{
+    cl_int error = CL_SUCCESS;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    std::string buildOptions{ "-DTYPE=" };
+    buildOptions += TestInfo<T>::deviceTypeName;
+    if (N > 1)
+    {
+        buildOptions += std::to_string(N);
+    }
+    buildOptions += " -DBASETYPE=";
+    buildOptions += TestInfo<T>::deviceTypeName;
+
+    const size_t ELEMENTS_TO_TEST = 65536;
+    std::vector<T> base(ELEMENTS_TO_TEST * N);
+    fill_vector_with_random_data(base);
+
+    std::vector<T> reference;
+    calculate_reference(reference, base);
+
+    const char* source = (N == 3) ? kernel_source_vec3 : kernel_source;
+    error =
+        create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                    "test_bit_reverse", buildOptions.c_str());
+    test_error(error, "Unable to create test_bit_reverse kernel");
+
+    clMemWrapper src;
+    clMemWrapper dst;
+
+    dst =
+        clCreateBuffer(context, 0, reference.size() * sizeof(T), NULL, &error);
+    test_error(error, "Unable to create output buffer");
+
+    src = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, base.size() * sizeof(T),
+                         base.data(), &error);
+    test_error(error, "Unable to create base buffer");
+
+    error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
+    test_error(error, "Unable to set output buffer kernel arg");
+
+    error = clSetKernelArg(kernel, 1, sizeof(src), &src);
+    test_error(error, "Unable to set base buffer kernel arg");
+
+    size_t global_work_size[] = { reference.size() / N };
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
+                                   NULL, 0, NULL, NULL);
+    test_error(error, "Unable to enqueue test kernel");
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed after test kernel");
+
+    std::vector<T> results(reference.size(), 99);
+    error =
+        clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, results.size() * sizeof(T),
+                            results.data(), 0, NULL, NULL);
+    test_error(error, "Unable to read data after test kernel");
+
+    if (results != reference)
+    {
+        log_error("Result buffer did not match reference buffer!\n");
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
+template <typename T>
+static int test_type(cl_device_id device, cl_context context,
+                     cl_command_queue queue)
+{
+    log_info("    testing type %s\n", TestInfo<T>::deviceTypeName);
+
+    return test_vectype<T, 1>(device, context, queue)
+        | test_vectype<T, 2>(device, context, queue)
+        | test_vectype<T, 3>(device, context, queue)
+        | test_vectype<T, 4>(device, context, queue)
+        | test_vectype<T, 8>(device, context, queue)
+        | test_vectype<T, 16>(device, context, queue);
+}
+
+int test_extended_bit_ops_reverse(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    if (is_extension_available(device, "cl_khr_extended_bit_ops"))
+    {
+        int result = TEST_PASS;
+
+        result |= test_type<cl_char>(device, context, queue);
+        result |= test_type<cl_uchar>(device, context, queue);
+        result |= test_type<cl_short>(device, context, queue);
+        result |= test_type<cl_ushort>(device, context, queue);
+        result |= test_type<cl_int>(device, context, queue);
+        result |= test_type<cl_uint>(device, context, queue);
+        if (gHasLong)
+        {
+            result |= test_type<cl_long>(device, context, queue);
+            result |= test_type<cl_ulong>(device, context, queue);
+        }
+        return result;
+    }
+
+    log_info("cl_khr_extended_bit_ops is not supported\n");
+    return TEST_SKIPPED_ITSELF;
+}
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index f18d0b97..cd47c76b 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -25,100 +25,16 @@ namespace {
 
 const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
+                               ParameterType::Double, ParameterType::Double,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -306,24 +222,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     Force64BitFPUPrecision();
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_ulong *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
@@ -332,8 +251,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     int totalSpecialValueCount = specialValuesCount * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
+    // Test edge cases
     if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
+    {
         cl_double *fp = (cl_double *)p;
         cl_double *fp2 = (cl_double *)p2;
         uint32_t x, y;
@@ -354,7 +274,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    // Init any remaining values.
+    // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
         p[idx] = genrand_int64(d);
@@ -365,43 +285,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
+        if (gHostFill)
         {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -431,7 +368,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -459,7 +396,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -605,8 +542,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                "%.13la}: *%.13la vs. %.13la\n",
                                name, sizeNames[k], err, s[j], s2[j], r[j],
                                test);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -642,8 +578,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -684,13 +619,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     test_info.skipNanInf = 0;
     test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -706,7 +634,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         test_info.tinfo[i].inBuf2 =
             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
@@ -716,7 +644,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -729,7 +657,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -737,27 +665,26 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -770,8 +697,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -782,15 +707,5 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index fe1491d7..3bab4057 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -25,98 +25,16 @@ namespace {
 
 const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
+                               ParameterType::Float, ParameterType::Float,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -309,24 +227,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_uint *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
@@ -335,8 +256,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     int totalSpecialValueCount = specialValuesCount * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
+    // Test edge cases
     if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
+    {
         float *fp = (float *)p;
         float *fp2 = (float *)p2;
         uint32_t x, y;
@@ -358,7 +280,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    // Init any remaining values.
+    // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
         p[idx] = genrand_int32(d);
@@ -369,43 +291,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -435,7 +374,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -447,7 +386,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clFinish(tinfo->tQueue)))
         {
             vlog_error("Error: clFinish failed! err: %d\n", error);
-            goto exit;
+            return error;
         }
         return CL_SUCCESS;
     }
@@ -502,7 +441,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -759,8 +698,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                             name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
                             s2[j], ((cl_uint *)s2)[j], r[j], test,
                             ((cl_uint *)&test)[0], j);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -799,8 +737,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -841,13 +778,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
     test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -863,7 +793,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         test_info.tinfo[i].inBuf2 =
             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
@@ -873,7 +803,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -886,7 +816,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -894,27 +824,26 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -927,8 +856,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -939,15 +866,5 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index f8786e68..a6c28557 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -24,100 +24,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global int",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global int* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, i0 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       int3 i0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
+                               ParameterType::Double, ParameterType::Int,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -309,24 +225,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     Force64BitFPUPrecision();
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // Start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_ulong *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     cl_ulong *p = (cl_ulong *)gIn + thread_id * buffer_elements;
@@ -335,8 +254,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     int totalSpecialValueCount = specialValuesCount * specialValuesIntCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
+    // Test edge cases
     if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
+    {
         cl_double *fp = (cl_double *)p;
         cl_int *ip2 = (cl_int *)p2;
         uint32_t x, y;
@@ -368,43 +288,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size / 2, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -434,7 +371,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -462,7 +399,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -528,8 +465,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                "*%.13la vs. %.13la\n",
                                name, sizeNames[k], err, s[j], s2[j], r[j],
                                test);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -548,7 +484,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if ((error = clFlush(tinfo->tQueue))) vlog("clFlush 3 failed\n");
 
-
     if (0 == (base & 0x0fffffff))
     {
         if (gVerboseBruteForce)
@@ -565,8 +500,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -603,13 +537,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     test_info.ftz = f->ftz || gForceFTZ;
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -625,7 +552,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         cl_buffer_region region2 = { i * test_info.subBufferSize
                                          * sizeof(cl_int),
@@ -638,7 +565,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -651,7 +578,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -659,27 +586,26 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -692,8 +618,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -704,15 +628,5 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index d855f447..dfe25efc 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -24,98 +24,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global int",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global int* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, i0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       int3 i0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               i0 = (int3)( in2[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               i0 = (int3)( in2[3*i], in2[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
+                               ParameterType::Float, ParameterType::Int,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -298,24 +216,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_float *s = 0;
     cl_int *s2 = 0;
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_uint *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
@@ -358,43 +279,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -424,7 +362,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -452,7 +390,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -520,8 +458,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                         name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
                         s2[j], r[j], ((uint32_t *)r)[j], test,
                         ((cl_uint *)&test)[0], j);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -557,8 +494,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -596,13 +532,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -618,7 +547,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         cl_buffer_region region2 = { i * test_info.subBufferSize
                                          * sizeof(cl_int),
@@ -631,7 +560,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -644,7 +573,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -652,27 +581,26 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -685,8 +613,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -697,15 +623,5 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index bbe5c438..7600ab16 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -23,101 +23,16 @@
 
 namespace {
 
-int BuildKernel(const char *operator_symbol, int vectorSize,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = in1[i] ",
-                        operator_symbol,
-                        " in2[i];\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       d0 = d0 ",
-        operator_symbol,
-        " d1;\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = d0 ",
-        operator_symbol,
-        " d1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
+                               ParameterType::Double, ParameterType::Double,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -301,19 +216,22 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     Force64BitFPUPrecision();
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_ulong *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
     }
 
@@ -327,8 +245,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     int totalSpecialValueCount = specialValuesCount * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
+    // Test edge cases
     if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
+    {
         cl_double *fp = (cl_double *)p;
         cl_double *fp2 = (cl_double *)p2;
         uint32_t x, y;
@@ -349,7 +268,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    // Init any remaining values.
+    // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
         p[idx] = genrand_int64(d);
@@ -360,43 +279,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
+        if (gHostFill)
         {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -426,7 +362,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -454,7 +390,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -576,8 +512,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     vlog_error(
                         "\nERROR: %s%s: %f ulp error at {%a, %a}: *%a vs. %a\n",
                         name, sizeNames[k], err, s[j], s2[j], r[j], test);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -613,8 +548,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -651,13 +585,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -673,7 +600,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         test_info.tinfo[i].inBuf2 =
             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
@@ -683,7 +610,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -696,7 +623,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -704,27 +631,26 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -737,8 +663,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -749,15 +673,5 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 1a28d8d8..741c396c 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -23,99 +23,16 @@
 
 namespace {
 
-int BuildKernel(const char *operator_symbol, int vectorSize,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = in1[i] ",
-                        operator_symbol,
-                        " in2[i];\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       f0 = f0 ",
-        operator_symbol,
-        " f1;\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = f0 ",
-        operator_symbol,
-        " f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
+                               ParameterType::Float, ParameterType::Float,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -296,24 +213,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         func = job->f->rfunc;
     }
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_uint *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
@@ -352,7 +272,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    // Init any remaining values.
+    // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
         p[idx] = genrand_int32(d);
@@ -372,43 +292,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -438,7 +375,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -495,7 +432,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -701,8 +638,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                "vs. %a (0x%8.8x) at index: %zu\n",
                                name, sizeNames[k], err, s[j], s2[j], r[j], test,
                                ((cl_uint *)&test)[0], j);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -738,8 +674,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -778,13 +713,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -800,7 +728,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         test_info.tinfo[i].inBuf2 =
             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
@@ -810,7 +738,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -823,7 +751,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -831,27 +759,26 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -864,8 +791,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -876,15 +801,5 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index bbfd707b..0dc5b9f9 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -25,115 +25,18 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global int* out2, __global double* in, "
-        "__global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, &i0 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "       vstore3( i0, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, &i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               out2[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               out2[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Double,
+                               ParameterType::Int, ParameterType::Double,
+                               ParameterType::Double, vector_size_index);
     };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    return BuildKernels(info, job_id, generator);
 }
 
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
-cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
-{
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
-}
 
 struct ComputeReferenceInfoD
 {
@@ -174,7 +77,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     int64_t maxError2 = 0;
     int ftz = f->ftz || gForceFTZ;
@@ -191,14 +95,12 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     int testingRemquo = !strcmp(f->name, "remquo");
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -225,28 +127,53 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
 
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
+                memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
+                               error);
+                    return error;
+                }
+
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -256,37 +183,38 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -325,14 +253,14 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
             if ((error =
                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -542,8 +470,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                                ((cl_ulong *)gOut_Ref)[j],
                                ((cl_uint *)gOut_Ref2)[j], test, q2[j],
                                ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -577,12 +504,5 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 07473376..7742076f 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -25,112 +25,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global int* out2, __global float* in, "
-        "__global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, &i0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( i0, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       int3 i0 = 0xdeaddead;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, &i0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Float,
+                               ParameterType::Int, ParameterType::Float,
+                               ParameterType::Float, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 struct ComputeReferenceInfoF
@@ -173,7 +77,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     int64_t maxError2 = 0;
@@ -192,14 +97,12 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     int testingRemquo = !strcmp(f->name, "remquo");
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -226,28 +129,53 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
 
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
+                memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
+                               error);
+                    return error;
+                }
+
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -257,37 +185,38 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -326,14 +255,14 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
             if ((error =
                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -525,8 +454,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                                ((cl_uint *)gOut_Ref)[j],
                                ((cl_uint *)gOut_Ref2)[j], test, q2[j],
                                ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
-                    error = -1;
-                    goto exit;
+                    return -1;
                 }
             }
         }
@@ -560,12 +488,5 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
index f5e9f993..47f493e7 100644
--- a/test_conformance/math_brute_force/common.cpp
+++ b/test_conformance/math_brute_force/common.cpp
@@ -29,6 +29,10 @@ const char *GetTypeName(ParameterType type)
     {
         case ParameterType::Float: return "float";
         case ParameterType::Double: return "double";
+        case ParameterType::Int: return "int";
+        case ParameterType::UInt: return "uint";
+        case ParameterType::Long: return "long";
+        case ParameterType::ULong: return "ulong";
     }
     return nullptr;
 }
@@ -39,6 +43,12 @@ const char *GetUndefValue(ParameterType type)
     {
         case ParameterType::Float:
         case ParameterType::Double: return "NAN";
+
+        case ParameterType::Int:
+        case ParameterType::UInt: return "0x12345678";
+
+        case ParameterType::Long:
+        case ParameterType::ULong: return "0x0ddf00dbadc0ffee";
     }
     return nullptr;
 }
@@ -57,18 +67,50 @@ void EmitDefineUndef(std::ostringstream &kernel, const char *name,
     kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
 }
 
-void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
+void EmitEnableExtension(std::ostringstream &kernel,
+                         const std::initializer_list<ParameterType> &types)
 {
-    switch (type)
+    bool needsFp64 = false;
+
+    for (const auto &type : types)
     {
-        case ParameterType::Double:
-            kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-            break;
+        switch (type)
+        {
+            case ParameterType::Double: needsFp64 = true; break;
 
-        case ParameterType::Float:
-            // No extension required.
-            break;
+            case ParameterType::Float:
+            case ParameterType::Int:
+            case ParameterType::UInt:
+            case ParameterType::Long:
+            case ParameterType::ULong:
+                // No extension required.
+                break;
+        }
+    }
+
+    if (needsFp64) kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+}
+
+std::string GetBuildOptions(bool relaxed_mode)
+{
+    std::ostringstream options;
+
+    if (gForceFTZ)
+    {
+        options << " -cl-denorms-are-zero";
+    }
+
+    if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
+    {
+        options << " -cl-fp32-correctly-rounded-divide-sqrt";
+    }
+
+    if (relaxed_mode)
+    {
+        options << " -cl-fast-relaxed-math";
     }
+
+    return options.str();
 }
 
 } // anonymous namespace
@@ -78,6 +120,354 @@ std::string GetKernelName(int vector_size_index)
     return std::string("math_kernel") + sizeNames[vector_size_index];
 }
 
+std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
+                           ParameterType retType, ParameterType type1,
+                           int vector_size_index)
+{
+    // To keep the kernel code readable, use macros for types and undef values.
+    std::ostringstream kernel;
+    EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+    EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+    EmitDefineUndef(kernel, "UNDEF1", type1);
+    EmitEnableExtension(kernel, { retType, type1 });
+
+    // clang-format off
+    const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+                          __global TYPE1* in1)
+{
+    size_t i = get_global_id(0);
+    out[i] = )", builtin, R"((in1[i]);
+}
+)" };
+
+    const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+                          __global TYPE1_SCALAR* in1)
+{
+    size_t i = get_global_id(0);
+
+    if (i + 1 < get_global_size(0))
+    {
+        TYPE1 a = vload3(0, in1 + 3 * i);
+        RETTYPE res = )", builtin, R"((a);
+        vstore3(res, 0, out + 3 * i);
+    }
+    else
+    {
+        // Figure out how many elements are left over after
+        // BUFFER_SIZE % (3 * sizeof(type)).
+        // Assume power of two buffer size.
+        size_t parity = i & 1;
+        TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+        switch (parity)
+        {
+            case 0:
+                a.y = in1[3 * i + 1];
+                // fall through
+            case 1:
+                a.x = in1[3 * i];
+                break;
+        }
+
+        RETTYPE res = )", builtin, R"((a);
+
+        switch (parity)
+        {
+            case 0:
+                out[3 * i + 1] = res.y;
+                // fall through
+            case 1:
+                out[3 * i] = res.x;
+                break;
+        }
+    }
+}
+)" };
+    // clang-format on
+
+    if (sizeValues[vector_size_index] != 3)
+        for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+    else
+        for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+    return kernel.str();
+}
+
+std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
+                           ParameterType retType1, ParameterType retType2,
+                           ParameterType type1, int vector_size_index)
+{
+    // To keep the kernel code readable, use macros for types and undef values.
+    std::ostringstream kernel;
+    EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
+    EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
+    EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+    EmitDefineUndef(kernel, "UNDEF1", type1);
+    EmitDefineUndef(kernel, "UNDEFR2", retType2);
+    EmitEnableExtension(kernel, { retType1, retType2, type1 });
+
+    // clang-format off
+    const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
+                          __global RETTYPE2* out2,
+                          __global TYPE1* in1)
+{
+    size_t i = get_global_id(0);
+    out1[i] = )", builtin, R"((in1[i], out2 + i);
+}
+)" };
+
+    const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
+                          __global RETTYPE2_SCALAR* out2,
+                          __global TYPE1_SCALAR* in1)
+{
+    size_t i = get_global_id(0);
+
+    if (i + 1 < get_global_size(0))
+    {
+        TYPE1 a = vload3(0, in1 + 3 * i);
+        RETTYPE2 res2 = UNDEFR2;
+        RETTYPE1 res1 = )", builtin, R"((a, &res2);
+        vstore3(res1, 0, out1 + 3 * i);
+        vstore3(res2, 0, out2 + 3 * i);
+    }
+    else
+    {
+        // Figure out how many elements are left over after
+        // BUFFER_SIZE % (3 * sizeof(type)).
+        // Assume power of two buffer size.
+        size_t parity = i & 1;
+        TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+        switch (parity)
+        {
+            case 0:
+                a.y = in1[3 * i + 1];
+                // fall through
+            case 1:
+                a.x = in1[3 * i];
+                break;
+        }
+
+        RETTYPE2 res2 = UNDEFR2;
+        RETTYPE1 res1 = )", builtin, R"((a, &res2);
+
+        switch (parity)
+        {
+            case 0:
+                out1[3 * i + 1] = res1.y;
+                out2[3 * i + 1] = res2.y;
+                // fall through
+            case 1:
+                out1[3 * i] = res1.x;
+                out2[3 * i] = res2.x;
+                break;
+        }
+    }
+}
+)" };
+    // clang-format on
+
+    if (sizeValues[vector_size_index] != 3)
+        for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+    else
+        for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+    return kernel.str();
+}
+
+std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
+                            ParameterType retType, ParameterType type1,
+                            ParameterType type2, int vector_size_index)
+{
+    // To keep the kernel code readable, use macros for types and undef values.
+    std::ostringstream kernel;
+    EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+    EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+    EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+    EmitDefineUndef(kernel, "UNDEF1", type1);
+    EmitDefineUndef(kernel, "UNDEF2", type2);
+    EmitEnableExtension(kernel, { retType, type1, type2 });
+
+    const bool is_vec3 = sizeValues[vector_size_index] == 3;
+
+    std::string invocation;
+    if (strlen(builtin) == 1)
+    {
+        // Assume a single-character builtin is an operator (e.g., +, *, ...).
+        invocation = is_vec3 ? "a" : "in1[i] ";
+        invocation += builtin;
+        invocation += is_vec3 ? "b" : " in2[i]";
+    }
+    else
+    {
+        // Otherwise call the builtin as a function with two arguments.
+        invocation = builtin;
+        invocation += is_vec3 ? "(a, b)" : "(in1[i], in2[i])";
+    }
+
+    // clang-format off
+    const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+                          __global TYPE1* in1,
+                          __global TYPE2* in2)
+{
+    size_t i = get_global_id(0);
+    out[i] = )", invocation.c_str(), R"(;
+}
+)" };
+
+    const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+                          __global TYPE1_SCALAR* in1,
+                          __global TYPE2_SCALAR* in2)
+{
+    size_t i = get_global_id(0);
+
+    if (i + 1 < get_global_size(0))
+    {
+        TYPE1 a = vload3(0, in1 + 3 * i);
+        TYPE2 b = vload3(0, in2 + 3 * i);
+        RETTYPE res = )", invocation.c_str(), R"(;
+        vstore3(res, 0, out + 3 * i);
+    }
+    else
+    {
+        // Figure out how many elements are left over after
+        // BUFFER_SIZE % (3 * sizeof(type)).
+        // Assume power of two buffer size.
+        size_t parity = i & 1;
+        TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+        TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+        switch (parity)
+        {
+            case 0:
+                a.y = in1[3 * i + 1];
+                b.y = in2[3 * i + 1];
+                // fall through
+            case 1:
+                a.x = in1[3 * i];
+                b.x = in2[3 * i];
+                break;
+        }
+
+        RETTYPE res = )", invocation.c_str(), R"(;
+
+        switch (parity)
+        {
+            case 0:
+                out[3 * i + 1] = res.y;
+                // fall through
+            case 1:
+                out[3 * i] = res.x;
+                break;
+        }
+    }
+}
+)" };
+    // clang-format on
+
+    if (!is_vec3)
+        for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+    else
+        for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+    return kernel.str();
+}
+
+std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
+                            ParameterType retType1, ParameterType retType2,
+                            ParameterType type1, ParameterType type2,
+                            int vector_size_index)
+{
+    // To keep the kernel code readable, use macros for types and undef values.
+    std::ostringstream kernel;
+    EmitDefineType(kernel, "RETTYPE1", retType1, vector_size_index);
+    EmitDefineType(kernel, "RETTYPE2", retType2, vector_size_index);
+    EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+    EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+    EmitDefineUndef(kernel, "UNDEF1", type1);
+    EmitDefineUndef(kernel, "UNDEF2", type2);
+    EmitDefineUndef(kernel, "UNDEFR2", retType2);
+    EmitEnableExtension(kernel, { retType1, retType2, type1, type2 });
+
+    // clang-format off
+    const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1* out1,
+                          __global RETTYPE2* out2,
+                          __global TYPE1* in1,
+                          __global TYPE2* in2)
+{
+    size_t i = get_global_id(0);
+    out1[i] = )", builtin, R"((in1[i], in2[i], out2 + i);
+}
+)" };
+
+    const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE1_SCALAR* out1,
+                          __global RETTYPE2_SCALAR* out2,
+                          __global TYPE1_SCALAR* in1,
+                          __global TYPE2_SCALAR* in2)
+{
+    size_t i = get_global_id(0);
+
+    if (i + 1 < get_global_size(0))
+    {
+        TYPE1 a = vload3(0, in1 + 3 * i);
+        TYPE2 b = vload3(0, in2 + 3 * i);
+        RETTYPE2 res2 = UNDEFR2;
+        RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
+        vstore3(res1, 0, out1 + 3 * i);
+        vstore3(res2, 0, out2 + 3 * i);
+    }
+    else
+    {
+        // Figure out how many elements are left over after
+        // BUFFER_SIZE % (3 * sizeof(type)).
+        // Assume power of two buffer size.
+        size_t parity = i & 1;
+        TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+        TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+        switch (parity)
+        {
+            case 0:
+                a.y = in1[3 * i + 1];
+                b.y = in2[3 * i + 1];
+                // fall through
+            case 1:
+                a.x = in1[3 * i];
+                b.x = in2[3 * i];
+                break;
+        }
+
+        RETTYPE2 res2 = UNDEFR2;
+        RETTYPE1 res1 = )", builtin, R"((a, b, &res2);
+
+        switch (parity)
+        {
+            case 0:
+                out1[3 * i + 1] = res1.y;
+                out2[3 * i + 1] = res2.y;
+                // fall through
+            case 1:
+                out1[3 * i] = res1.x;
+                out2[3 * i] = res2.x;
+                break;
+        }
+    }
+}
+)" };
+    // clang-format on
+
+    if (sizeValues[vector_size_index] != 3)
+        for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+    else
+        for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+    return kernel.str();
+}
+
 std::string GetTernaryKernel(const std::string &kernel_name,
                              const char *builtin, ParameterType retType,
                              ParameterType type1, ParameterType type2,
@@ -92,7 +482,7 @@ std::string GetTernaryKernel(const std::string &kernel_name,
     EmitDefineUndef(kernel, "UNDEF1", type1);
     EmitDefineUndef(kernel, "UNDEF2", type2);
     EmitDefineUndef(kernel, "UNDEF3", type3);
-    EmitEnableExtension(kernel, type1);
+    EmitEnableExtension(kernel, { retType, type1, type2, type3 });
 
     // clang-format off
     const char *kernel_nonvec3[] = { R"(
@@ -168,3 +558,42 @@ __kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
 
     return kernel.str();
 }
+
+cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
+                    SourceGenerator generator)
+{
+    // Generate the kernel code.
+    cl_uint vector_size_index = gMinVectorSizeIndex + job_id;
+    auto kernel_name = GetKernelName(vector_size_index);
+    auto source = generator(kernel_name, info.nameInCode, vector_size_index);
+    std::array<const char *, 1> sources{ source.c_str() };
+
+    // Create the program.
+    clProgramWrapper &program = info.programs[vector_size_index];
+    auto options = GetBuildOptions(info.relaxedMode);
+    int error =
+        create_single_kernel_helper(gContext, &program, nullptr, sources.size(),
+                                    sources.data(), nullptr, options.c_str());
+    if (error != CL_SUCCESS)
+    {
+        vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
+        return error;
+    }
+
+    // Create a kernel for each thread. cl_kernels aren't thread safe, so make
+    // one for every thread
+    auto &kernels = info.kernels[vector_size_index];
+    assert(kernels.empty() && "Dirty BuildKernelInfo");
+    kernels.resize(info.threadCount);
+    for (auto &kernel : kernels)
+    {
+        kernel = clCreateKernel(program, kernel_name.c_str(), &error);
+        if (!kernel || error != CL_SUCCESS)
+        {
+            vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
+            return error;
+        }
+    }
+
+    return CL_SUCCESS;
+}
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 143814ca..481b3b2a 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -24,7 +24,8 @@
 #include <vector>
 
 // Array of thread-specific kernels for each vector size.
-using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
+using KernelMatrix =
+    std::array<std::vector<clKernelWrapper>, VECTOR_SIZE_COUNT>;
 
 // Array of programs for each vector size.
 using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
@@ -37,12 +38,29 @@ enum class ParameterType
 {
     Float,
     Double,
+    Int,
+    UInt,
+    Long,
+    ULong,
 };
 
 // Return kernel name suffixed with vector size.
 std::string GetKernelName(int vector_size_index);
 
 // Generate kernel code for the given builtin function/operator.
+std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
+                           ParameterType retType, ParameterType type1,
+                           int vector_size_index);
+std::string GetUnaryKernel(const std::string &kernel_name, const char *builtin,
+                           ParameterType retType1, ParameterType retType2,
+                           ParameterType type1, int vector_size_index);
+std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
+                            ParameterType retType, ParameterType type1,
+                            ParameterType type2, int vector_size_index);
+std::string GetBinaryKernel(const std::string &kernel_name, const char *builtin,
+                            ParameterType retType1, ParameterType retType2,
+                            ParameterType type1, ParameterType type2,
+                            int vector_size_index);
 std::string GetTernaryKernel(const std::string &kernel_name,
                              const char *builtin, ParameterType retType,
                              ParameterType type1, ParameterType type2,
@@ -65,4 +83,12 @@ struct BuildKernelInfo
     bool relaxedMode;
 };
 
+using SourceGenerator = std::string (*)(const std::string &kernel_name,
+                                        const char *builtin,
+                                        cl_uint vector_size_index);
+
+/// Build kernels for all threads in "info" for the given job_id.
+cl_int BuildKernels(BuildKernelInfo &info, cl_uint job_id,
+                    SourceGenerator generator);
+
 #endif /* COMMON_H */
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index 0cbcf86e..3d6ce152 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -24,100 +24,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
+                              ParameterType::Double, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 } // anonymous namespace
@@ -126,7 +41,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     int ftz = f->ftz || gForceFTZ;
     uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
     int scale =
@@ -143,8 +59,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
+        BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
@@ -173,18 +89,33 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    goto exit;
+                }
+            }
+            else
+            {
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -194,22 +125,22 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
                 goto exit;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
                 goto exit;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
                 goto exit;
@@ -297,11 +228,5 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
 exit:
     RestoreFPState(&oldMode);
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 90bb1e16..94ebc66a 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -24,98 +24,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in)\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
+                              ParameterType::Float, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 } // anonymous namespace
@@ -124,7 +41,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
     int scale = (int)((1ULL << 32) / (16 * BUFFER_SIZE / sizeof(float)) + 1);
@@ -140,8 +58,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
+        BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
@@ -170,18 +88,33 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    goto exit;
+                }
+            }
+            else
+            {
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -191,22 +124,22 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
                 goto exit;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
                 goto exit;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
                 goto exit;
@@ -293,11 +226,5 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
 exit:
     RestoreFPState(&oldMode);
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 412f210b..51d5b64b 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -24,100 +24,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global long",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global long* out, __global double* in, __global double* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       double3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       long3 l0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( l0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       double3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       long3 l0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = l0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = l0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Long,
+                               ParameterType::Double, ParameterType::Double,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -292,24 +208,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     Force64BitFPUPrecision();
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_long *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_long *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     double *p = (double *)gIn + thread_id * buffer_elements;
@@ -318,8 +237,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     int totalSpecialValueCount = specialValuesCount * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
+    // Test edge cases
     if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
+    {
         uint32_t x, y;
 
         x = (job_id * buffer_elements) % specialValuesCount;
@@ -338,7 +258,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    // Init any remaining values.
+    // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
         ((cl_ulong *)p)[idx] = genrand_int64(d);
@@ -349,43 +269,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
+        if (gHostFill)
         {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -415,7 +352,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -442,7 +379,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -493,8 +430,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                        "vs. %" PRId64 "  (index: %zu)\n",
                        name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
                        q[j], j);
-            error = -1;
-            goto exit;
+            return -1;
         }
 
 
@@ -542,8 +478,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                            "  (index: %zu)\n",
                            name, sizeNames[k], err, ((double *)s)[j],
                            ((double *)s2)[j], -t[j], q[j], j);
-                error = -1;
-                goto exit;
+                return -1;
             }
         }
     }
@@ -578,8 +513,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -612,13 +546,6 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     test_info.ftz = f->ftz || gForceFTZ;
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -634,7 +561,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         test_info.tinfo[i].inBuf2 =
             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
@@ -644,7 +571,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -657,7 +584,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -665,29 +592,26 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
+        if (error) return error;
 
         if (gWimpyMode)
             vlog("Wimp pass");
@@ -697,15 +621,5 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index cb915fc7..b00a29ff 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -23,98 +23,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in, __global float* in2)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       int3 i0 = ",
-        name,
-        "( f0, f1 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetBinaryKernel(kernel_name, builtin, ParameterType::Int,
+                               ParameterType::Float, ParameterType::Float,
+                               vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -279,24 +197,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     cl_float *s = 0;
     cl_float *s2 = 0;
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_int *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_int *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
@@ -306,8 +227,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     int totalSpecialValueCount = specialValuesCount * specialValuesCount;
     int lastSpecialJobIndex = (totalSpecialValueCount - 1) / buffer_elements;
 
+    // Test edge cases
     if (job_id <= (cl_uint)lastSpecialJobIndex)
-    { // test edge cases
+    {
         float *fp = (float *)p;
         float *fp2 = (float *)p2;
         uint32_t x, y;
@@ -329,7 +251,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
     }
 
-    // Init any remaining values.
+    // Init any remaining values
     for (; idx < buffer_elements; idx++)
     {
         p[idx] = genrand_int32(d);
@@ -340,43 +262,60 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                       buffer_size, p, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     if ((error = clEnqueueWriteBuffer(tinfo->tQueue, tinfo->inBuf2, CL_FALSE, 0,
                                       buffer_size, p2, 0, NULL, NULL)))
     {
         vlog_error("Error: clEnqueueWriteBuffer failed! err: %d\n", error);
-        goto exit;
+        return error;
     }
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
+        if (gHostFill)
         {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            goto exit;
-        }
-        if ((error = clReleaseEvent(e[j])))
-        {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            goto exit;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            goto exit;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -406,7 +345,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                                             &vectorCount, NULL, 0, NULL, NULL)))
         {
             vlog_error("FAILED -- could not execute kernel\n");
-            goto exit;
+            return error;
         }
     }
 
@@ -433,7 +372,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         {
             vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
                        error);
-            goto exit;
+            return error;
         }
     }
 
@@ -481,8 +420,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                        "0x%8.8x (index: %zu)\n",
                        name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
                        j);
-            error = -1;
-            goto exit;
+            return -1;
         }
 
         for (auto k = std::max(1U, gMinVectorSizeIndex);
@@ -527,8 +465,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                            "vs. 0x%8.8x (index: %zu)\n",
                            name, sizeNames[k], err, ((float *)s)[j],
                            ((float *)s2)[j], -t[j], q[j], j);
-                error = -1;
-                goto exit;
+                return -1;
             }
         }
     }
@@ -563,8 +500,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         fflush(stdout);
     }
 
-exit:
-    return error;
+    return CL_SUCCESS;
 }
 
 } // anonymous namespace
@@ -598,13 +534,6 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -620,7 +549,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
         test_info.tinfo[i].inBuf2 =
             clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
@@ -630,7 +559,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -643,7 +572,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -651,29 +580,26 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
 
         test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
+        if (error) return error;
 
         if (gWimpyMode)
             vlog("Wimp pass");
@@ -683,15 +609,5 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index c2e7cdcc..2d75bc5c 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -24,94 +24,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global long",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global long* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       long3 l0 = ",
-        name,
-        "( d0 );\n"
-        "       vstore3( l0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       long3 l0 = ",
-        name,
-        "( d0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = l0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = l0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Long,
+                              ParameterType::Double, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -165,24 +86,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     Force64BitFPUPrecision();
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_long *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_long *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_long *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Write the new values to the input array
     cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
@@ -198,31 +122,48 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            return error;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -396,13 +337,6 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     test_info.ftz = f->ftz || gForceFTZ;
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -418,7 +352,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -431,7 +365,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -439,27 +373,24 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
+        if (error) return error;
 
         if (gWimpyMode)
             vlog("Wimp pass");
@@ -469,15 +400,5 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 6a1b9b9a..53679788 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -23,93 +23,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global int",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global int* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 i0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( i0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 i0;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       i0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = i0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = i0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Int,
+                              ParameterType::Float, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -167,24 +89,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
 #define ref_func(s) (signbit_test ? func.i_f_f(s) : func.i_f(s))
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_int *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_int *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_int *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Init input array
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
@@ -199,31 +124,48 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            return error;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -407,13 +349,6 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -429,7 +364,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -442,7 +377,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -450,27 +385,24 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
+        if (error) return error;
 
         if (gWimpyMode)
             vlog("Wimp pass");
@@ -480,15 +412,5 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index 8d8fec52..623d59ce 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -23,32 +23,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    auto kernel_name = GetKernelName(vectorSize);
-    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
-                                   ParameterType::Double, ParameterType::Double,
-                                   ParameterType::Double, vectorSize);
-    std::array<const char *, 1> sources{ source.c_str() };
-    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
-                      relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetTernaryKernel(kernel_name, builtin, ParameterType::Double,
+                                ParameterType::Double, ParameterType::Double,
+                                ParameterType::Double, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 } // anonymous namespace
@@ -57,7 +41,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     double maxErrorVal = 0.0f;
     double maxErrorVal2 = 0.0f;
@@ -67,14 +52,12 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -110,18 +93,33 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -131,37 +129,37 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer3), &gInBuffer3)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -184,7 +182,7 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -212,12 +210,5 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 04ac5aa6..3127cca0 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -23,32 +23,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    auto kernel_name = GetKernelName(vectorSize);
-    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
-                                   ParameterType::Float, ParameterType::Float,
-                                   ParameterType::Float, vectorSize);
-    std::array<const char *, 1> sources{ source.c_str() };
-    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
-                      relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetTernaryKernel(kernel_name, builtin, ParameterType::Float,
+                                ParameterType::Float, ParameterType::Float,
+                                ParameterType::Float, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 } // anonymous namespace
@@ -60,7 +44,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     float maxErrorVal = 0.0f;
     float maxErrorVal2 = 0.0f;
@@ -68,14 +53,12 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
     uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -111,18 +94,33 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -132,37 +130,37 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer3), &gInBuffer3)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -185,7 +183,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -213,12 +211,5 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 64491bd4..74dd5c47 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -65,6 +65,7 @@ static int gStopOnError = 0;
 static bool gSkipRestOfTests;
 int gForceFTZ = 0;
 int gWimpyMode = 0;
+int gHostFill = 0;
 static int gHasDouble = 0;
 static int gTestFloat = 1;
 // This flag should be 'ON' by default and it can be changed through the command
@@ -421,6 +422,8 @@ static int ParseArgs(int argc, const char **argv)
                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
                         break;
 
+                    case 'b': gHostFill ^= 1; break;
+
                     case 'z': gForceFTZ ^= 1; break;
 
                     case '1':
@@ -550,6 +553,7 @@ static void PrintUsage(void)
     vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
          "1-10, default factor(%u)\n",
          gWimpyReductionFactor);
+    vlog("\t\t-b\tFill buffers on host instead of device. (Default: off)\n");
     vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
          "device capabilities by default.)\n");
     vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
@@ -1022,82 +1026,6 @@ int IsTininessDetectedBeforeRounding(void)
     return 0;
 }
 
-
-int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
-               cl_program *p, bool relaxedMode)
-{
-    int error = 0;
-    char options[200] = "";
-
-    if (gForceFTZ)
-    {
-        strcat(options, " -cl-denorms-are-zero");
-    }
-
-    if (relaxedMode)
-    {
-        strcat(options, " -cl-fast-relaxed-math");
-    }
-
-    error =
-        create_single_kernel_helper(gContext, p, k, count, c, name, options);
-    if (error != CL_SUCCESS)
-    {
-        vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
-        return error;
-    }
-
-    return error;
-}
-
-int MakeKernels(const char **c, cl_uint count, const char *name,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    char options[200] = "";
-
-    if (gForceFTZ)
-    {
-        strcat(options, " -cl-denorms-are-zero ");
-    }
-
-    if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
-    {
-        strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
-    }
-
-    if (relaxedMode)
-    {
-        strcat(options, " -cl-fast-relaxed-math");
-    }
-
-    int error =
-        create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
-    if (error != CL_SUCCESS)
-    {
-        vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
-        return error;
-    }
-
-    for (cl_uint i = 0; i < kernel_count; i++)
-    {
-        k[i] = clCreateKernel(*p, name, &error);
-        if (NULL == k[i] || error)
-        {
-            char buffer[2048] = "";
-
-            vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
-            clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
-                                  sizeof(buffer), buffer, NULL);
-            vlog_error("Log: %s\n", buffer);
-            return error;
-        }
-    }
-
-    return error;
-}
-
-
 static int IsInRTZMode(void)
 {
     int error;
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index b5f1ab09..2ae65424 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -27,32 +27,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    auto kernel_name = GetKernelName(vectorSize);
-    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
-                                   ParameterType::Double, ParameterType::Double,
-                                   ParameterType::Double, vectorSize);
-    std::array<const char *, 1> sources{ source.c_str() };
-    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
-                      relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetTernaryKernel(kernel_name, builtin, ParameterType::Double,
+                                ParameterType::Double, ParameterType::Double,
+                                ParameterType::Double, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // A table of more difficult cases to get right
@@ -134,7 +118,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ;
     double maxErrorVal = 0.0f;
@@ -147,14 +132,12 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
     Force64BitFPUPrecision();
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -215,18 +198,33 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -236,37 +234,37 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
             size_t vectorSize = sizeof(cl_double) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer3), &gInBuffer3)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -289,7 +287,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -617,8 +615,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                                    "%.13la, %.13la}: *%.13la vs. %.13la\n",
                                    f->name, sizeNames[k], err, s[j], s2[j],
                                    s3[j], ((double *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -653,12 +650,5 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index cf361841..d11f4ba3 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -27,32 +27,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    auto kernel_name = GetKernelName(vectorSize);
-    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
-                                   ParameterType::Float, ParameterType::Float,
-                                   ParameterType::Float, vectorSize);
-    std::array<const char *, 1> sources{ source.c_str() };
-    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
-                      relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetTernaryKernel(kernel_name, builtin, ParameterType::Float,
+                                ParameterType::Float, ParameterType::Float,
+                                ParameterType::Float, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // A table of more difficult cases to get right
@@ -146,7 +130,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     float maxErrorVal = 0.0f;
@@ -165,14 +150,12 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     int skipNanInf = (0 == strcmp("fma", f->nameInCode)) && !gInfNanSupport;
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -237,18 +220,33 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
+            {
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -258,37 +256,37 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             size_t vectorSize = sizeof(cl_float) * sizeValues[j];
             size_t localCount = (BUFFER_SIZE + vectorSize - 1)
                 / vectorSize; // BUFFER_SIZE / vectorSize  rounded up
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
-                                        &gInBuffer2)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer2), &gInBuffer2)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
-                                        &gInBuffer3)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 3,
+                                        sizeof(gInBuffer3), &gInBuffer3)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -326,7 +324,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -754,8 +752,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                             f->name, sizeNames[k], err, s[j], s2[j], s3[j],
                             ((cl_uint *)s)[j], ((cl_uint *)s2)[j],
                             ((cl_uint *)s3)[j], ((float *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -789,12 +786,5 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 177cfe5b..5da18f84 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -24,94 +24,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
+                              ParameterType::Double, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -173,24 +94,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     Force64BitFPUPrecision();
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_ulong *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_ulong *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_ulong *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Write the new values to the input array
     cl_double *p = (cl_double *)gIn + thread_id * buffer_elements;
@@ -206,31 +130,48 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            return error;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -421,13 +362,6 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     test_info.ftz = f->ftz || gForceFTZ;
     test_info.relaxedMode = relaxedMode;
 
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -443,7 +377,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -456,7 +390,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -464,25 +398,24 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -494,8 +427,6 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -506,15 +437,5 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 4c1f1a1d..0c497bc4 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -23,92 +23,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                cl_kernel *k, cl_program *p, bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernels(kern, (cl_uint)kernSize, testName, kernel_count, k, p,
-                       relaxedMode);
-}
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
-                       info->kernels[vectorSize].data(),
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
+                              ParameterType::Float, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 // Thread specific data for a worker thread
@@ -177,24 +100,27 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     float half_sin_cos_tan_limit = job->half_sin_cos_tan_limit;
     int ftz = job->ftz;
 
-    // start the map of the output arrays
     cl_event e[VECTOR_SIZE_COUNT];
     cl_uint *out[VECTOR_SIZE_COUNT];
-    for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+    if (gHostFill)
     {
-        out[j] = (cl_uint *)clEnqueueMapBuffer(
-            tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
-            buffer_size, 0, NULL, e + j, &error);
-        if (error || NULL == out[j])
+        // start the map of the output arrays
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
-            vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
-                       error);
-            return error;
+            out[j] = (cl_uint *)clEnqueueMapBuffer(
+                tinfo->tQueue, tinfo->outBuf[j], CL_FALSE, CL_MAP_WRITE, 0,
+                buffer_size, 0, NULL, e + j, &error);
+            if (error || NULL == out[j])
+            {
+                vlog_error("Error: clEnqueueMapBuffer %d failed! err: %d\n", j,
+                           error);
+                return error;
+            }
         }
-    }
 
-    // Get that moving
-    if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+        // Get that moving
+        if ((error = clFlush(tinfo->tQueue))) vlog("clFlush failed\n");
+    }
 
     // Write the new values to the input array
     cl_uint *p = (cl_uint *)gIn + thread_id * buffer_elements;
@@ -233,31 +159,48 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
     {
-        // Wait for the map to finish
-        if ((error = clWaitForEvents(1, e + j)))
-        {
-            vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
-            return error;
-        }
-        if ((error = clReleaseEvent(e[j])))
+        if (gHostFill)
         {
-            vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
-            return error;
+            // Wait for the map to finish
+            if ((error = clWaitForEvents(1, e + j)))
+            {
+                vlog_error("Error: clWaitForEvents failed! err: %d\n", error);
+                return error;
+            }
+            if ((error = clReleaseEvent(e[j])))
+            {
+                vlog_error("Error: clReleaseEvent failed! err: %d\n", error);
+                return error;
+            }
         }
 
         // Fill the result buffer with garbage, so that old results don't carry
         // over
         uint32_t pattern = 0xffffdead;
-        memset_pattern4(out[j], &pattern, buffer_size);
-        if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
-                                             out[j], 0, NULL, NULL)))
+        if (gHostFill)
         {
-            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
-                       error);
-            return error;
+            memset_pattern4(out[j], &pattern, buffer_size);
+            if ((error = clEnqueueUnmapMemObject(
+                     tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                           error);
+                return error;
+            }
+        }
+        else
+        {
+            if ((error = clEnqueueFillBuffer(tinfo->tQueue, tinfo->outBuf[j],
+                                             &pattern, sizeof(pattern), 0,
+                                             buffer_size, 0, NULL, NULL)))
+            {
+                vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                           error);
+                return error;
+            }
         }
 
-        // run the kernel
+        // Run the kernel
         size_t vectorCount =
             (buffer_elements + sizeValues[j] - 1) / sizeValues[j];
         cl_kernel kernel = job->k[j][thread_id]; // each worker thread has its
@@ -573,13 +516,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.ftz =
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     test_info.relaxedMode = relaxedMode;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        test_info.k[i].resize(test_info.threadCount, nullptr);
-    }
-
     test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
@@ -595,7 +531,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
                        "region {%zd, %zd}\n",
                        region.origin, region.size);
-            goto exit;
+            return error;
         }
 
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
@@ -608,7 +544,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                 vlog_error("Error: Unable to create sub-buffer of "
                            "gOutBuffer[%d] for region {%zd, %zd}\n",
                            (int)j, region.origin, region.size);
-                goto exit;
+                return error;
             }
         }
         test_info.tinfo[i].tQueue =
@@ -616,7 +552,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         if (NULL == test_info.tinfo[i].tQueue || error)
         {
             vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
+            return error;
         }
     }
 
@@ -639,20 +575,19 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     }
 
     // Init the kernels
-    {
-        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
-                                    test_info.programs, f->nameInCode,
-                                    relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
+    BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                test_info.programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     // Run the kernels
     if (!gSkipCorrectnessTesting || skipTestingRelaxed)
     {
         error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+        if (error) return error;
 
         // Accumulate the arithmetic errors
         for (cl_uint i = 0; i < test_info.threadCount; i++)
@@ -664,8 +599,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             }
         }
 
-        if (error) goto exit;
-
         if (gWimpyMode)
             vlog("Wimp pass");
         else
@@ -674,7 +607,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         if (skipTestingRelaxed)
         {
             vlog(" (rlx skip correctness testing)\n");
-            goto exit;
+            return error;
         }
 
         vlog("\t%8.2f @ %a", maxError, maxErrorVal);
@@ -682,15 +615,5 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        for (auto &kernel : test_info.k[i])
-        {
-            clReleaseKernel(kernel);
-        }
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 6d7c61d6..f464c791 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -24,107 +24,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* out2, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       double3 iout = NAN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 iout = NAN;\n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
+                              ParameterType::Double, ParameterType::Double,
+                              vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 } // anonymous namespace
@@ -133,7 +42,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError0 = 0.0f;
     float maxError1 = 0.0f;
     int ftz = f->ftz || gForceFTZ;
@@ -148,14 +58,12 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
     Force64BitFPUPrecision();
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -178,28 +86,53 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
 
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
+                memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
+                               error);
+                    return error;
+                }
+
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -208,31 +141,32 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -258,14 +192,14 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
             if ((error =
                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -404,8 +338,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                             f->name, sizeNames[k], err, err2,
                             ((double *)gIn)[j], ((double *)gOut_Ref)[j],
                             ((double *)gOut_Ref2)[j], test, test2);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -440,12 +373,5 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 42e858c4..74c5a160 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -24,105 +24,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* out2, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 iout = NAN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 iout = NAN;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
+                              ParameterType::Float, ParameterType::Float,
+                              vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 } // anonymous namespace
@@ -131,7 +42,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError0 = 0.0f;
     float maxError1 = 0.0f;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -147,14 +59,12 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
 
     float float_ulps = getAllowedUlpError(f, relaxedMode);
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -192,28 +102,53 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
 
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
+                memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
+                               error);
+                    return error;
+                }
+
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -222,31 +157,32 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -312,14 +248,14 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
             if ((error =
                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -534,8 +470,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                                    f->name, sizeNames[k], err, err2,
                                    ((float *)gIn)[j], ((float *)gOut_Ref)[j],
                                    ((float *)gOut_Ref2)[j], test, test2);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -572,12 +507,5 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 8b751944..2c84826f 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -25,107 +25,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global double",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global int* out2, __global double* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 iout = INT_MIN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 iout = INT_MIN;\n"
-        "       double3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
+                              ParameterType::Int, ParameterType::Double,
+                              vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 cl_ulong abs_cl_long(cl_long i)
@@ -140,7 +49,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     int64_t maxError2 = 0;
     int ftz = f->ftz || gForceFTZ;
@@ -156,14 +66,12 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
     Force64BitFPUPrecision();
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -186,28 +94,53 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
 
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
+                memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
+                               error);
+                    return error;
+                }
+
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -216,31 +149,32 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -262,14 +196,14 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
             if ((error =
                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -376,8 +310,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                                    f->name, sizeNames[k], err, (int)iErr,
                                    ((double *)gIn)[j], ((double *)gOut_Ref)[j],
                                    ((int *)gOut_Ref2)[j], test, q2[j]);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -412,12 +345,5 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 54843a29..aea49fa6 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -25,105 +25,16 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global int",
-                        sizeNames[vectorSize],
-                        "* out2, __global float",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i], out2 + i );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global int* out2, __global float* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       int3 iout = INT_MIN;\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "       vstore3( iout, 0, out2 + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       int3 iout = INT_MIN;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, &iout );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               out2[3*i+1] = iout.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               out2[3*i] = iout.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
+                              ParameterType::Int, ParameterType::Float,
+                              vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 cl_ulong abs_cl_long(cl_long i)
@@ -138,7 +49,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     int64_t maxError2 = 0;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -159,14 +71,12 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
     maxiError = float_ulps == INFINITY ? CL_ULONG_MAX : 0;
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -189,28 +99,53 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
-            }
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
 
-            memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
-            if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j], CL_FALSE,
-                                              0, BUFFER_SIZE, gOut2[j], 0, NULL,
-                                              NULL)))
+                memset_pattern4(gOut2[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer2[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut2[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2b(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 1 failed! err: %d\n",
+                               error);
+                    return error;
+                }
+
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer2[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer 2 failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -219,31 +154,32 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gOutBuffer2[j]),
-                                        &gOutBuffer2[j])))
+            if ((error =
+                     clSetKernelArg(kernels[j][thread_id], 1,
+                                    sizeof(gOutBuffer2[j]), &gOutBuffer2[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 2,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -265,14 +201,14 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
             if ((error =
                      clEnqueueReadBuffer(gQueue, gOutBuffer2[j], CL_TRUE, 0,
                                          BUFFER_SIZE, gOut2[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray2 failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -374,8 +310,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                                    f->name, sizeNames[k], err, (int)iErr,
                                    ((float *)gIn)[j], ((float *)gOut_Ref)[j],
                                    ((int *)gOut_Ref2)[j], test, q2[j]);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -410,12 +345,5 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 9b60904a..8521b4b9 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -24,102 +24,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global ulong",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global ulong* in                 )\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       ulong3 u0 = vload3( 0, in + 3 * i );\n"
-        "       double3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       ulong3 u0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               u0 = (ulong3)( in[3*i], 0xdeaddeaddeaddeadUL, "
-        "0xdeaddeaddeaddeadUL ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               u0 = (ulong3)( in[3*i], in[3*i+1], "
-        "0xdeaddeaddeaddeadUL ); \n"
-        "               break;\n"
-        "       }\n"
-        "       double3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Double,
+                              ParameterType::ULong, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 cl_ulong random64(MTdata d)
@@ -133,7 +46,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
+    KernelMatrix kernels;
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ;
     double maxErrorVal = 0.0f;
@@ -144,14 +58,12 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
     Force64BitFPUPrecision();
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -167,18 +79,33 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -187,25 +114,25 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_double);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -226,7 +153,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -273,8 +200,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                             "*%.13la vs. %.13la\n",
                             f->name, sizeNames[k], err, ((uint64_t *)gIn)[j],
                             ((double *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -308,12 +234,5 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index b67a9bda..7a410240 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -24,99 +24,15 @@
 
 namespace {
 
-int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
-                bool relaxedMode)
-{
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global uint",
-                        sizeNames[vectorSize],
-                        "* in )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global uint* in)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       uint3 u0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f0 = ",
-        name,
-        "( u0 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       uint3 u0;\n"
-        "       float3 f0;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               u0 = (uint3)( in[3*i], 0xdead, 0xdead ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               u0 = (uint3)( in[3*i], in[3*i+1], 0xdead ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( u0 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
-}
-
-struct BuildKernelInfo2
-{
-    cl_kernel *kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
-    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
-                       &(info->programs[vectorSize]), info->relaxedMode);
+    BuildKernelInfo &info = *(BuildKernelInfo *)p;
+    auto generator = [](const std::string &kernel_name, const char *builtin,
+                        cl_uint vector_size_index) {
+        return GetUnaryKernel(kernel_name, builtin, ParameterType::Float,
+                              ParameterType::UInt, vector_size_index);
+    };
+    return BuildKernels(info, job_id, generator);
 }
 
 } // anonymous namespace
@@ -125,7 +41,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
     Programs programs;
-    cl_kernel kernels[VECTOR_SIZE_COUNT];
+    KernelMatrix kernels;
+    const unsigned thread_id = 0; // Test is currently not multithreaded.
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     float maxErrorVal = 0.0f;
@@ -141,14 +58,12 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
         float_ulps = f->float_ulps;
 
     // Init the kernels
-    {
-        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
-                                     relaxedMode };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            return error;
-    }
+    BuildKernelInfo build_info{ 1, kernels, programs, f->nameInCode,
+                                relaxedMode };
+    if ((error = ThreadPool_Do(BuildKernelFn,
+                               gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                               &build_info)))
+        return error;
 
     for (uint64_t i = 0; i < (1ULL << 32); i += step)
     {
@@ -171,18 +86,33 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
             return error;
         }
 
-        // write garbage into output arrays
+        // Write garbage into output arrays
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
         {
             uint32_t pattern = 0xffffdead;
-            memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
-            if ((error =
-                     clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
-                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
+            if (gHostFill)
+            {
+                memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
+                if ((error = clEnqueueWriteBuffer(gQueue, gOutBuffer[j],
+                                                  CL_FALSE, 0, BUFFER_SIZE,
+                                                  gOut[j], 0, NULL, NULL)))
+                {
+                    vlog_error(
+                        "\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
+                        error, j);
+                    return error;
+                }
+            }
+            else
             {
-                vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
-                           error, j);
-                goto exit;
+                if ((error = clEnqueueFillBuffer(gQueue, gOutBuffer[j],
+                                                 &pattern, sizeof(pattern), 0,
+                                                 BUFFER_SIZE, 0, NULL, NULL)))
+                {
+                    vlog_error("Error: clEnqueueFillBuffer failed! err: %d\n",
+                               error);
+                    return error;
+                }
             }
         }
 
@@ -191,25 +121,25 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
         {
             size_t vectorSize = sizeValues[j] * sizeof(cl_float);
             size_t localCount = (BUFFER_SIZE + vectorSize - 1) / vectorSize;
-            if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
-                                        &gOutBuffer[j])))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 0,
+                                        sizeof(gOutBuffer[j]), &gOutBuffer[j])))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
-            if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
-                                        &gInBuffer)))
+            if ((error = clSetKernelArg(kernels[j][thread_id], 1,
+                                        sizeof(gInBuffer), &gInBuffer)))
             {
                 LogBuildError(programs[j]);
-                goto exit;
+                return error;
             }
 
-            if ((error =
-                     clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
-                                            &localCount, NULL, 0, NULL, NULL)))
+            if ((error = clEnqueueNDRangeKernel(gQueue, kernels[j][thread_id],
+                                                1, NULL, &localCount, NULL, 0,
+                                                NULL, NULL)))
             {
                 vlog_error("FAILED -- could not execute kernel\n");
-                goto exit;
+                return error;
             }
         }
 
@@ -230,7 +160,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                                          BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
             {
                 vlog_error("ReadArray failed %d\n", error);
-                goto exit;
+                return error;
             }
         }
 
@@ -275,8 +205,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
                             "\n%s%s: %f ulp error at 0x%8.8x: *%a vs. %a\n",
                             f->name, sizeNames[k], err, ((uint32_t *)gIn)[j],
                             ((float *)gOut_Ref)[j], test);
-                        error = -1;
-                        goto exit;
+                        return -1;
                     }
                 }
             }
@@ -310,12 +239,5 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 
     vlog("\n");
 
-exit:
-    // Release
-    for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
-    {
-        clReleaseKernel(kernels[k]);
-    }
-
-    return error;
+    return CL_SUCCESS;
 }
diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
index b4a59edb..652d990a 100644
--- a/test_conformance/math_brute_force/utility.h
+++ b/test_conformance/math_brute_force/utility.h
@@ -59,9 +59,8 @@ extern int gSkipCorrectnessTesting;
 extern int gForceFTZ;
 extern int gFastRelaxedDerived;
 extern int gWimpyMode;
+extern int gHostFill;
 extern int gIsInRTZMode;
-extern int gInfNanSupport;
-extern int gIsEmbedded;
 extern int gVerboseBruteForce;
 extern uint32_t gMaxVectorSizeIndex;
 extern uint32_t gMinVectorSizeIndex;
@@ -83,12 +82,6 @@ float Abs_Error(float test, double reference);
 float Ulp_Error(float test, double reference);
 float Bruteforce_Ulp_Error_Double(double test, long double reference);
 
-int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
-               cl_program *p, bool relaxedMode);
-int MakeKernels(const char **c, cl_uint count, const char *name,
-                cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                bool relaxedMode);
-
 // used to convert a bucket of bits into a search pattern through double
 inline double DoubleFromUInt32(uint32_t bits)
 {
diff --git a/test_conformance/mem_host_flags/CMakeLists.txt b/test_conformance/mem_host_flags/CMakeLists.txt
index 73a36f0d..4f2b960d 100644
--- a/test_conformance/mem_host_flags/CMakeLists.txt
+++ b/test_conformance/mem_host_flags/CMakeLists.txt
@@ -6,4 +6,6 @@ set(${MODULE_NAME}_SOURCES
     mem_host_image.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h
index 1d3b4757..78692d17 100644
--- a/test_conformance/mem_host_flags/C_host_memory_block.h
+++ b/test_conformance/mem_host_flags/C_host_memory_block.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -22,59 +22,48 @@
 #include <stdlib.h>
 #include <string.h>
 
-template < class T> class C_host_memory_block
-{
+template <class T> class C_host_memory_block {
 public:
-  int num_elements;
-  int element_size;
-  T *pData;
-
-  C_host_memory_block();
-  ~C_host_memory_block();
-  void Init(int num_elem, T &value);
-  void Init(int num_elem);
-  void Set_to(T & val);
-  void Set_to_zero();
-  bool Equal_to(T &val);
-  size_t Count(T &val);
-  bool Equal(C_host_memory_block < T > & another);
-  bool Equal_rect(C_host_memory_block < T > & another,
-                  size_t * host_origin,
-                  size_t * region,
-                  size_t host_row_pitch,
-                  size_t host_slice_pitch);
-  bool Equal(T *pData, int num_elements);
-
-  bool Equal_rect_from_orig(C_host_memory_block < T > & another,
-                            size_t * soffset,
-                            size_t * region,
-                            size_t host_row_pitch,
-                            size_t host_slice_pitch);
-
-  bool Equal_rect_from_orig(T* another_pdata,
-                            size_t * soffset,
-                            size_t * region,
-                            size_t host_row_pitch,
-                            size_t host_slice_pitch);
+    int num_elements;
+    int element_size;
+    T *pData;
+
+    C_host_memory_block();
+    ~C_host_memory_block();
+    void Init(int num_elem, T &value);
+    void Init(int num_elem);
+    void Set_to(T &val);
+    void Set_to_zero();
+    bool Equal_to(T &val);
+    size_t Count(T &val);
+    bool Equal(C_host_memory_block<T> &another);
+    bool Equal_rect(C_host_memory_block<T> &another, size_t *host_origin,
+                    size_t *region, size_t host_row_pitch,
+                    size_t host_slice_pitch);
+    bool Equal(T *pData, int num_elements);
+
+    bool Equal_rect_from_orig(C_host_memory_block<T> &another, size_t *soffset,
+                              size_t *region, size_t host_row_pitch,
+                              size_t host_slice_pitch);
+
+    bool Equal_rect_from_orig(T *another_pdata, size_t *soffset, size_t *region,
+                              size_t host_row_pitch, size_t host_slice_pitch);
 };
 
-template < class T >
-C_host_memory_block<T>::C_host_memory_block()
+template <class T> C_host_memory_block<T>::C_host_memory_block()
 {
-  pData = NULL;
-  element_size = sizeof (T);
-  num_elements = 0;
+    pData = NULL;
+    element_size = sizeof(T);
+    num_elements = 0;
 }
 
-template < class T>
-C_host_memory_block<T>::~C_host_memory_block()
+template <class T> C_host_memory_block<T>::~C_host_memory_block()
 {
     if (pData != NULL) delete[] pData;
     num_elements = 0;
 }
 
-template < class T >
-void C_host_memory_block<T>::Init(int num_elem, T & value)
+template <class T> void C_host_memory_block<T>::Init(int num_elem, T &value)
 {
     if (pData != NULL) delete[] pData;
     pData = new T[num_elem];
@@ -83,8 +72,7 @@ void C_host_memory_block<T>::Init(int num_elem, T & value)
     num_elements = num_elem;
 }
 
-template < class T >
-void C_host_memory_block<T>::Init(int num_elem)
+template <class T> void C_host_memory_block<T>::Init(int num_elem)
 {
     if (pData != NULL) delete[] pData;
     pData = new T[num_elem];
@@ -92,158 +80,149 @@ void C_host_memory_block<T>::Init(int num_elem)
 
     num_elements = num_elem;
 }
-template < class T >
-void  C_host_memory_block<T>::Set_to_zero()
+template <class T> void C_host_memory_block<T>::Set_to_zero()
 {
-  T v = 0;
-  Set_to(v);
+    T v = 0;
+    Set_to(v);
 }
 
-template < class T >
-void  C_host_memory_block<T>::Set_to(T &val)
+template <class T> void C_host_memory_block<T>::Set_to(T &val)
 {
-  for (int i=0; i<num_elements; i++)
-    pData[i] = val;
+    for (int i = 0; i < num_elements; i++) pData[i] = val;
 }
 
-template < class T >
-bool C_host_memory_block<T>::Equal_to(T &val)
+template <class T> bool C_host_memory_block<T>::Equal_to(T &val)
 {
-  int count = 0;
+    int count = 0;
 
-  for (int i=0; i<num_elements; i++) {
-    if (pData[i] == val)
-      count++;
-  }
+    for (int i = 0; i < num_elements; i++)
+    {
+        if (pData[i] == val) count++;
+    }
 
-  return (count== num_elements);
+    return (count == num_elements);
 }
 
-template < class T >
-bool C_host_memory_block<T>::Equal(C_host_memory_block < T > & another)
+template <class T>
+bool C_host_memory_block<T>::Equal(C_host_memory_block<T> &another)
 {
-  int count = 0;
+    int count = 0;
 
-  for (int i=0; i<num_elements; i++) {
-    if (pData[i] == another.pData[i])
-      count++;
-  }
+    for (int i = 0; i < num_elements; i++)
+    {
+        if (pData[i] == another.pData[i]) count++;
+    }
 
-  return (count== num_elements);
+    return (count == num_elements);
 }
 
-template < class T >
+template <class T>
 bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements)
 {
-  if (this->num_elements!= Innum_elements)
-    return false;
+    if (this->num_elements != Innum_elements) return false;
 
-  int count = 0;
+    int count = 0;
 
-  for (int i=0; i<num_elements ; i++ ) {
-    if (pData[i] == pIn_Data[i])
-      count++;
-  }
+    for (int i = 0; i < num_elements; i++)
+    {
+        if (pData[i] == pIn_Data[i]) count++;
+    }
 
-  return ( count== num_elements);
+    return (count == num_elements);
 }
 
-template < class T >
-size_t C_host_memory_block<T>::Count(T &val)
+template <class T> size_t C_host_memory_block<T>::Count(T &val)
 {
-  size_t count = 0;
-  for (int i=0; i<num_elements; i++) {
-    if (pData[i] == val)
-      count++;
-  }
+    size_t count = 0;
+    for (int i = 0; i < num_elements; i++)
+    {
+        if (pData[i] == val) count++;
+    }
 
-  return count;
+    return count;
 }
 
-template < class T >
-bool C_host_memory_block<T>::Equal_rect(C_host_memory_block < T > & another,
-                                        size_t  * soffset,
-                                        size_t  * region,
+template <class T>
+bool C_host_memory_block<T>::Equal_rect(C_host_memory_block<T> &another,
+                                        size_t *soffset, size_t *region,
                                         size_t host_row_pitch,
                                         size_t host_slice_pitch)
 {
-  size_t row_pitch   = host_row_pitch ? host_row_pitch : region[0];
-  size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
+    size_t row_pitch = host_row_pitch ? host_row_pitch : region[0];
+    size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
 
-  size_t count = 0;
+    size_t count = 0;
 
-  size_t total = region[0] * region[1] * region[2];
+    size_t total = region[0] * region[1] * region[2];
 
-  size_t x, y, z;
-  size_t orig = (size_t)(soffset[0] + row_pitch*soffset[1] + slice_pitch * soffset[2]);
-  for (z=0; z<region[2]; z++)
-    for (y=0; y<region[1]; y++)
-      for (x=0; x<region[0]; x++)
-      {
-        int p1 = (int)(x + row_pitch*y + slice_pitch* z + orig);
-        if (pData[p1] == another.pData[p1])
-          count++;
-      }
+    size_t x, y, z;
+    size_t orig = (size_t)(soffset[0] + row_pitch * soffset[1]
+                           + slice_pitch * soffset[2]);
+    for (z = 0; z < region[2]; z++)
+        for (y = 0; y < region[1]; y++)
+            for (x = 0; x < region[0]; x++)
+            {
+                int p1 = (int)(x + row_pitch * y + slice_pitch * z + orig);
+                if (pData[p1] == another.pData[p1]) count++;
+            }
 
-  return (count == total);
+    return (count == total);
 }
 
-template < class T >
-bool C_host_memory_block<T>::Equal_rect_from_orig(C_host_memory_block < T > & another,
-                                                  size_t * soffset,
-                                                  size_t * region,
-                                                  size_t host_row_pitch,
-                                                  size_t host_slice_pitch)
+template <class T>
+bool C_host_memory_block<T>::Equal_rect_from_orig(
+    C_host_memory_block<T> &another, size_t *soffset, size_t *region,
+    size_t host_row_pitch, size_t host_slice_pitch)
 {
-  size_t row_pitch   = host_row_pitch ? host_row_pitch : region[0];
-  size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
-
-  size_t count = 0;
-
-  size_t total = region[0] * region[1] * region[2];
-
-  size_t x, y, z;
-  size_t orig = soffset[0] + row_pitch * soffset[1] + slice_pitch * soffset[2];
-  for (z=0; z<region[2]; z++)
-    for (y=0; y<region[1]; y++)
-      for (x=0; x<region[0]; x++)
-      {
-        size_t p1 = x + (row_pitch*y) + (slice_pitch*z);
-        size_t p2 = p1 + orig;
-        if (pData[p2] == another.pData[p1])
-          count++;
-      }
-
-  return (count == total);
+    size_t row_pitch = host_row_pitch ? host_row_pitch : region[0];
+    size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
+
+    size_t count = 0;
+
+    size_t total = region[0] * region[1] * region[2];
+
+    size_t x, y, z;
+    size_t orig =
+        soffset[0] + row_pitch * soffset[1] + slice_pitch * soffset[2];
+    for (z = 0; z < region[2]; z++)
+        for (y = 0; y < region[1]; y++)
+            for (x = 0; x < region[0]; x++)
+            {
+                size_t p1 = x + (row_pitch * y) + (slice_pitch * z);
+                size_t p2 = p1 + orig;
+                if (pData[p2] == another.pData[p1]) count++;
+            }
+
+    return (count == total);
 }
 
-template < class T >
-bool C_host_memory_block<T>::Equal_rect_from_orig(T* another_pdata,
-                                                  size_t * soffset,
-                                                  size_t * region,
+template <class T>
+bool C_host_memory_block<T>::Equal_rect_from_orig(T *another_pdata,
+                                                  size_t *soffset,
+                                                  size_t *region,
                                                   size_t host_row_pitch,
                                                   size_t host_slice_pitch)
 {
-  size_t row_pitch   = host_row_pitch ? host_row_pitch : region[0];
-  size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
-
-  size_t count = 0;
-
-  size_t total = region[0] * region[1] * region[2];
-
-  size_t x, y, z;
-  size_t orig = soffset[0] + row_pitch*soffset[1] + slice_pitch * soffset[2];
-  for (z=0; z<region[2]; z++)
-    for (y=0; y<region[1]; y++)
-      for (x=0; x<region[0]; x++)
-      {
-        size_t p1 = x + (row_pitch * y) + (slice_pitch * z);
-        size_t p2 = p1 + orig;
-        if (pData[p2] == another_pdata[p1])
-          count++;
-      }
-
-  return (count == total);
+    size_t row_pitch = host_row_pitch ? host_row_pitch : region[0];
+    size_t slice_pitch = host_slice_pitch ? host_row_pitch : region[1];
+
+    size_t count = 0;
+
+    size_t total = region[0] * region[1] * region[2];
+
+    size_t x, y, z;
+    size_t orig =
+        soffset[0] + row_pitch * soffset[1] + slice_pitch * soffset[2];
+    for (z = 0; z < region[2]; z++)
+        for (y = 0; y < region[1]; y++)
+            for (x = 0; x < region[0]; x++)
+            {
+                size_t p1 = x + (row_pitch * y) + (slice_pitch * z);
+                size_t p2 = p1 + orig;
+                if (pData[p2] == another_pdata[p1]) count++;
+            }
+
+    return (count == total);
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/checker.h b/test_conformance/mem_host_flags/checker.h
index ae879558..835f120b 100644
--- a/test_conformance/mem_host_flags/checker.h
+++ b/test_conformance/mem_host_flags/checker.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -27,291 +27,316 @@
 #define TEST_VALUE 5
 typedef cl_char TEST_ELEMENT_TYPE;
 
-enum {SUCCESS, FAILURE=-1000};
+enum
+{
+    SUCCESS,
+    FAILURE = -1000
+};
 
 extern const char *buffer_write_kernel_code[];
 
-enum BUFFER_TYPE {_BUFFER, _Sub_BUFFER};
-
-template < class T > class cBuffer_checker
+enum BUFFER_TYPE
 {
+    _BUFFER,
+    _Sub_BUFFER
+};
+
+template <class T> class cBuffer_checker {
 public:
-  cBuffer_checker(cl_device_id deviceID, cl_context context,
-                  cl_command_queue queue);
-  ~cBuffer_checker();
+    cBuffer_checker(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue);
+    ~cBuffer_checker();
 
-  cl_device_id m_deviceID;
-  cl_context m_context;
-  cl_command_queue m_queue;
+    cl_device_id m_deviceID;
+    cl_context m_context;
+    cl_command_queue m_queue;
 
-  clMemWrapper m_buffer, m_buffer_parent;
-  enum BUFFER_TYPE m_buffer_type;
+    clMemWrapper m_buffer, m_buffer_parent;
+    enum BUFFER_TYPE m_buffer_type;
 
-  cl_buffer_region m_sub_buffer_region;
+    cl_buffer_region m_sub_buffer_region;
 
-  cl_int err;
-  cl_bool m_blocking;
-  cl_mem_flags buffer_mem_flag;
+    cl_int err;
+    cl_bool m_blocking;
+    cl_mem_flags buffer_mem_flag;
 
-  C_host_memory_block<T> host_m_0, host_m_1, host_m_2;
-  int m_nNumber_elements;
+    C_host_memory_block<T> host_m_0, host_m_1, host_m_2;
+    int m_nNumber_elements;
 
-  void *pData, *pData2;
+    void *pData, *pData2;
 
-  void * pHost_ptr; // the host ptr at creation
+    void *pHost_ptr; // the host ptr at creation
 
-  size_t buffer_origin[3];
-  size_t host_origin[3];
-  size_t region[3];
-  size_t buffer_row_pitch;
-  size_t buffer_slice_pitch;
-  size_t host_row_pitch;
-  size_t host_slice_pitch;
+    size_t buffer_origin[3];
+    size_t host_origin[3];
+    size_t region[3];
+    size_t buffer_row_pitch;
+    size_t buffer_slice_pitch;
+    size_t host_row_pitch;
+    size_t host_slice_pitch;
 
-  size_t buffer_origin_bytes[3];
-  size_t host_origin_bytes[3];
-  size_t region_bytes[3];
-  size_t buffer_row_pitch_bytes;
-  size_t buffer_slice_pitch_bytes;
-  size_t host_row_pitch_bytes;
-  size_t host_slice_pitch_bytes;
+    size_t buffer_origin_bytes[3];
+    size_t host_origin_bytes[3];
+    size_t region_bytes[3];
+    size_t buffer_row_pitch_bytes;
+    size_t buffer_slice_pitch_bytes;
+    size_t host_row_pitch_bytes;
+    size_t host_slice_pitch_bytes;
 
-  cl_int CreateBuffer(cl_mem_flags buffer_mem_flag, void * pdata);
-  int get_block_size_bytes() { return (int)(m_nNumber_elements * sizeof(T)); };
-  virtual cl_int SetupBuffer() = 0;
+    cl_int CreateBuffer(cl_mem_flags buffer_mem_flag, void *pdata);
+    int get_block_size_bytes()
+    {
+        return (int)(m_nNumber_elements * sizeof(T));
+    };
+    virtual cl_int SetupBuffer() = 0;
 
-  virtual cl_int Setup_Test_Environment();
+    virtual cl_int Setup_Test_Environment();
 
-  virtual cl_int SetupASSubBuffer(cl_mem_flags parent_buffer_flag);
+    virtual cl_int SetupASSubBuffer(cl_mem_flags parent_buffer_flag);
 
-  virtual cl_int verify(cl_int err, cl_event & event);
+    virtual cl_int verify(cl_int err, cl_event &event);
 
-  virtual cl_int Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag);
+    virtual cl_int Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag);
 
-  void Init_rect(int bufforg[3], int host_org[3], int region[3],
-                 int buffer_pitch[2], int host_pitch[2]);
+    void Init_rect(int bufforg[3], int host_org[3], int region[3],
+                   int buffer_pitch[2], int host_pitch[2]);
 
-  void Init_rect();
+    void Init_rect();
 
-  virtual cl_int verify_RW_Buffer() = 0;
-  virtual cl_int verify_RW_Buffer_rect() = 0;
-  virtual cl_int verify_RW_Buffer_mapping() = 0;
+    virtual cl_int verify_RW_Buffer() = 0;
+    virtual cl_int verify_RW_Buffer_rect() = 0;
+    virtual cl_int verify_RW_Buffer_mapping() = 0;
 };
 
-template < class T >
+template <class T>
 cBuffer_checker<T>::cBuffer_checker(cl_device_id deviceID, cl_context context,
                                     cl_command_queue queue)
 {
-  m_nNumber_elements = 0;
+    m_nNumber_elements = 0;
 
-  m_deviceID = deviceID;
-  m_context = context;
-  m_queue = queue;
+    m_deviceID = deviceID;
+    m_context = context;
+    m_queue = queue;
 
-  m_blocking = false;
+    m_blocking = false;
 
-  buffer_mem_flag = CL_MEM_READ_WRITE;
-  pData = pData2 = NULL;
+    buffer_mem_flag = CL_MEM_READ_WRITE;
+    pData = pData2 = NULL;
 
-  buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;
-  host_origin[0] = host_origin[1] = host_origin[2] = 0;
-  region[0] = region[1] = region[2] = 0;
-  buffer_row_pitch = buffer_slice_pitch = host_row_pitch = host_slice_pitch = 0;
+    buffer_origin[0] = buffer_origin[1] = buffer_origin[2] = 0;
+    host_origin[0] = host_origin[1] = host_origin[2] = 0;
+    region[0] = region[1] = region[2] = 0;
+    buffer_row_pitch = buffer_slice_pitch = host_row_pitch = host_slice_pitch =
+        0;
 
-  buffer_origin_bytes[0] = buffer_origin_bytes[1] = buffer_origin_bytes[2] = 0;
-  host_origin_bytes[0] = host_origin_bytes[1] = host_origin_bytes[2] = 0;
-  region_bytes[0] = region_bytes[1] = region_bytes[2] = 0;
-  buffer_row_pitch_bytes = buffer_slice_pitch_bytes = 0;
-  host_row_pitch_bytes = host_slice_pitch_bytes = 0;
+    buffer_origin_bytes[0] = buffer_origin_bytes[1] = buffer_origin_bytes[2] =
+        0;
+    host_origin_bytes[0] = host_origin_bytes[1] = host_origin_bytes[2] = 0;
+    region_bytes[0] = region_bytes[1] = region_bytes[2] = 0;
+    buffer_row_pitch_bytes = buffer_slice_pitch_bytes = 0;
+    host_row_pitch_bytes = host_slice_pitch_bytes = 0;
 
-  pHost_ptr = NULL;
+    pHost_ptr = NULL;
 }
 
-template < class T >
-cBuffer_checker<T>::~cBuffer_checker()
-{
-}
+template <class T> cBuffer_checker<T>::~cBuffer_checker() {}
 
 
-template < class T >
-cl_int cBuffer_checker<T>::SetupBuffer()
+template <class T> cl_int cBuffer_checker<T>::SetupBuffer()
 {
-  m_buffer_type = _BUFFER;
-  return CL_SUCCESS;
+    m_buffer_type = _BUFFER;
+    return CL_SUCCESS;
 }
 
-template < class T >
-cl_int cBuffer_checker<T>::Setup_Test_Environment()
+template <class T> cl_int cBuffer_checker<T>::Setup_Test_Environment()
 {
-  return CL_SUCCESS;
+    return CL_SUCCESS;
 }
 
-template < class T >
+template <class T>
 cl_int cBuffer_checker<T>::SetupASSubBuffer(cl_mem_flags parent_buffer_flag)
 {
-  m_buffer_type = _Sub_BUFFER;
-
-  int supersize = 8000;
-  this-> m_nNumber_elements = 1000;
-  T vv1= TEST_VALUE;
-
-  int block_size_in_byte = (int)(supersize * sizeof(T));
-
-  this->host_m_0.Init(supersize);
-
-  m_buffer_parent = clCreateBuffer(this->m_context, parent_buffer_flag,
-                                   block_size_in_byte, this->host_m_0.pData, &err);
-  test_error(err, "clCreateBuffer error");
+    m_buffer_type = _Sub_BUFFER;
+
+    int supersize = 8000;
+    this->m_nNumber_elements = 1000;
+    T vv1 = TEST_VALUE;
+
+    int block_size_in_byte = (int)(supersize * sizeof(T));
+
+    this->host_m_0.Init(supersize);
+
+    m_buffer_parent =
+        clCreateBuffer(this->m_context, parent_buffer_flag, block_size_in_byte,
+                       this->host_m_0.pData, &err);
+    test_error(err, "clCreateBuffer error");
+
+    int size = this->m_nNumber_elements; // the size of subbuffer in elements
+
+    cl_uint base_addr_align_bits;
+    err = clGetDeviceInfo(m_deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
+                          sizeof base_addr_align_bits, &base_addr_align_bits,
+                          NULL);
+    test_error(err, "clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN");
+
+    int base_addr_align_bytes = base_addr_align_bits / 8;
+
+    int buffer_origin[3] = { base_addr_align_bytes, 0, 0 };
+    int host_origin[3] = { 0, 0, 0 };
+    int region[3] = { size, 1, 1 };
+    int buffer_pitch[2] = { 0, 0 };
+    int host_pitch[2] = { 0, 0 };
+    this->Init_rect(buffer_origin, host_origin, region, buffer_pitch,
+                    host_pitch);
+
+    this->m_nNumber_elements = size; // the size of subbuffer in elements
+    this->host_m_1.Init(this->m_nNumber_elements, vv1);
+
+    this->m_sub_buffer_region.origin = this->buffer_origin_bytes[0]; // in bytes
+    this->m_sub_buffer_region.size = this->region_bytes[0];
+
+    cl_int err = CL_SUCCESS;
+    err = clEnqueueReadBufferRect(
+        this->m_queue, m_buffer_parent, CL_TRUE, this->buffer_origin_bytes,
+        this->host_origin_bytes, this->region_bytes,
+        this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+        this->host_row_pitch_bytes, this->host_slice_pitch_bytes,
+        this->host_m_1.pData, 0, NULL,
+        NULL); // update the mem_1
+
+    if (err == CL_SUCCESS
+        && (parent_buffer_flag
+            & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS)))
+    {
+        log_error("Calling clEnqueueReadBufferRect on a memory object created "
+                  "with the CL_MEM_HOST_WRITE_ONLY flag or the "
+                  "CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return err;
+    }
+    else
+    {
+        err = CL_SUCCESS;
+    }
+
+    cl_mem_flags f;
+    if (parent_buffer_flag & CL_MEM_HOST_READ_ONLY)
+        f = CL_MEM_HOST_READ_ONLY;
+    else if (parent_buffer_flag & CL_MEM_HOST_WRITE_ONLY)
+        f = CL_MEM_HOST_WRITE_ONLY;
+    else if (parent_buffer_flag & CL_MEM_HOST_NO_ACCESS)
+        f = CL_MEM_HOST_NO_ACCESS;
+
+    m_buffer =
+        clCreateSubBuffer(m_buffer_parent, f, CL_BUFFER_CREATE_TYPE_REGION,
+                          &(this->m_sub_buffer_region), &err);
+    test_error(err, "clCreateSubBuffer error");
+
+    if (parent_buffer_flag | CL_MEM_USE_HOST_PTR)
+    {
+        this->pHost_ptr = (this->host_m_0.pData
+                           + this->m_sub_buffer_region.origin / sizeof(T));
+    }
+
+    T vv2 = 0;
+    this->host_m_2.Init(this->m_nNumber_elements, vv2);
 
-  int size = this->m_nNumber_elements; // the size of subbuffer in elements
-
-  cl_uint base_addr_align_bits;
-  err = clGetDeviceInfo(m_deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof base_addr_align_bits, &base_addr_align_bits, NULL);
-  test_error(err,"clGetDeviceInfo for CL_DEVICE_MEM_BASE_ADDR_ALIGN");
-
-  int base_addr_align_bytes = base_addr_align_bits/8;
-
-  int buffer_origin[3] = {base_addr_align_bytes, 0, 0};
-  int host_origin[3] = {0, 0, 0};
-  int region[3] = {size, 1, 1};
-  int buffer_pitch[2] = {0, 0};
-  int host_pitch[2] = {0, 0};
-  this->Init_rect(buffer_origin, host_origin, region, buffer_pitch, host_pitch);
-
-  this->m_nNumber_elements = size; // the size of subbuffer in elements
-  this->host_m_1.Init(this->m_nNumber_elements, vv1);
-
-  this->m_sub_buffer_region.origin = this->buffer_origin_bytes[0]; // in bytes
-  this->m_sub_buffer_region.size = this->region_bytes[0];
-
-  cl_int err = CL_SUCCESS;
-  err = clEnqueueReadBufferRect(
-      this->m_queue, m_buffer_parent, CL_TRUE, this->buffer_origin_bytes,
-      this->host_origin_bytes, this->region_bytes, this->buffer_row_pitch_bytes,
-      this->buffer_slice_pitch_bytes, this->host_row_pitch_bytes,
-      this->host_slice_pitch_bytes, this->host_m_1.pData, 0, NULL,
-      NULL); // update the mem_1
-
-  if (err == CL_SUCCESS && (parent_buffer_flag & (CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_NO_ACCESS))) {
-    log_error("Calling clEnqueueReadBufferRect on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag or the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
     return err;
-  } else {
-    err = CL_SUCCESS;
-  }
-
-  cl_mem_flags f;
-  if (parent_buffer_flag & CL_MEM_HOST_READ_ONLY)
-    f = CL_MEM_HOST_READ_ONLY;
-  else if (parent_buffer_flag & CL_MEM_HOST_WRITE_ONLY)
-    f = CL_MEM_HOST_WRITE_ONLY;
-  else if (parent_buffer_flag & CL_MEM_HOST_NO_ACCESS)
-    f = CL_MEM_HOST_NO_ACCESS;
-
-  m_buffer = clCreateSubBuffer(m_buffer_parent, f, CL_BUFFER_CREATE_TYPE_REGION,
-                               &(this->m_sub_buffer_region), &err);
-  test_error(err, "clCreateSubBuffer error");
-
-  if (parent_buffer_flag | CL_MEM_USE_HOST_PTR)
-  {
-    this->pHost_ptr = (this->host_m_0.pData  + this->m_sub_buffer_region.origin/sizeof(T));
-  }
-
-  T vv2 = 0;
-  this->host_m_2.Init(this->m_nNumber_elements, vv2);
-
-  return err;
 }
 
-template < class T >
-cl_int cBuffer_checker<T>::verify(cl_int err, cl_event & event)
+template <class T>
+cl_int cBuffer_checker<T>::verify(cl_int err, cl_event &event)
 {
-  return CL_SUCCESS;
+    return CL_SUCCESS;
 }
 
-template < class T >
-cl_int cBuffer_checker<T>::CreateBuffer(cl_mem_flags buffer_mem_flag, void *pdata)
+template <class T>
+cl_int cBuffer_checker<T>::CreateBuffer(cl_mem_flags buffer_mem_flag,
+                                        void *pdata)
 {
-  cl_int err = CL_SUCCESS;
-  int block_size_in_byte= m_nNumber_elements* sizeof(T);
+    cl_int err = CL_SUCCESS;
+    int block_size_in_byte = m_nNumber_elements * sizeof(T);
 
-  m_buffer = clCreateBuffer(m_context, buffer_mem_flag, block_size_in_byte, pdata, &err);
+    m_buffer = clCreateBuffer(m_context, buffer_mem_flag, block_size_in_byte,
+                              pdata, &err);
 
-  return err;
+    return err;
 };
 
-template < class T >
+template <class T>
 cl_int cBuffer_checker<T>::Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag)
 {
-  cl_int err = CL_SUCCESS;
-  cl_mem_flags buffer_mem_flag_Check;
-  err = clGetMemObjectInfo(this->m_buffer, CL_MEM_FLAGS, sizeof(cl_mem_flags),
-                           &buffer_mem_flag_Check, NULL);
+    cl_int err = CL_SUCCESS;
+    cl_mem_flags buffer_mem_flag_Check;
+    err = clGetMemObjectInfo(this->m_buffer, CL_MEM_FLAGS, sizeof(cl_mem_flags),
+                             &buffer_mem_flag_Check, NULL);
 
-  if (buffer_mem_flag_Check != buffer_mem_flag) {
-    log_error("clGetMemObjectInfo result differs from the specified result\n");
-    return err;
-  }
+    if (buffer_mem_flag_Check != buffer_mem_flag)
+    {
+        log_error(
+            "clGetMemObjectInfo result differs from the specified result\n");
+        return err;
+    }
 
-  cl_uint count = 0;
-  err = clGetMemObjectInfo(this->m_buffer, CL_MEM_REFERENCE_COUNT,
-                           sizeof(cl_uint), &count, NULL);
+    cl_uint count = 0;
+    err = clGetMemObjectInfo(this->m_buffer, CL_MEM_REFERENCE_COUNT,
+                             sizeof(cl_uint), &count, NULL);
 
-  if (count > 1)
-    log_info("========= buffer count %d\n", count);
+    if (count > 1) log_info("========= buffer count %d\n", count);
 
-  test_error(err, "clGetMemObjectInfo failed");
+    test_error(err, "clGetMemObjectInfo failed");
 
-  return err;
+    return err;
 }
 
-template < class T >
-void cBuffer_checker<T>::Init_rect ()
+template <class T> void cBuffer_checker<T>::Init_rect()
 {
-  int buffer_origin[3] = {10, 0, 0};
-  int host_origin[3] = {10, 0, 0};
-  int region[3] = {8, 1, 1};
-  int buffer_pitch[2] = {0, 0};
-  int host_pitch[2] = {0, 0};
-
-  this->Init_rect(buffer_origin, host_origin, region, buffer_pitch, host_pitch);
+    int buffer_origin[3] = { 10, 0, 0 };
+    int host_origin[3] = { 10, 0, 0 };
+    int region[3] = { 8, 1, 1 };
+    int buffer_pitch[2] = { 0, 0 };
+    int host_pitch[2] = { 0, 0 };
+
+    this->Init_rect(buffer_origin, host_origin, region, buffer_pitch,
+                    host_pitch);
 }
 
-template < class T >
+template <class T>
 void cBuffer_checker<T>::Init_rect(int bufforg[3], int host_org[3],
-                                   int region_in[3], int buffer_pitch[2], int host_pitch[2])
+                                   int region_in[3], int buffer_pitch[2],
+                                   int host_pitch[2])
 {
-  buffer_origin[0] = bufforg[0];
-  buffer_origin[1] = bufforg[1];
-  buffer_origin[2] = bufforg[2];
-
-  host_origin[0] = host_org[0];
-  host_origin[1] = host_org[1];
-  host_origin[2] = host_org[2];
-
-  region[0] = region_in[0];
-  region[1] = region_in[1];
-  region[2] = region_in[2];
-
-  buffer_row_pitch   = buffer_pitch[0];
-  buffer_slice_pitch = buffer_pitch[1];
-  host_row_pitch     = host_pitch[0];
-  host_slice_pitch   = host_pitch[1];
-
-  int sizeof_element = sizeof(T);
-  for (int k=0; k<3; k++)
-  {
-    buffer_origin_bytes[k] = buffer_origin[k] * sizeof_element;
-    host_origin_bytes [k]  = host_origin[k] * sizeof_element;
-  }
-
-  region_bytes[0]          = region[0] * sizeof_element;
-  region_bytes[1]          = region[1];
-  region_bytes[2]          = region[2];
-  buffer_row_pitch_bytes   = buffer_row_pitch*  sizeof_element;
-  buffer_slice_pitch_bytes = buffer_slice_pitch*  sizeof_element;
-  host_row_pitch_bytes     = host_row_pitch*  sizeof_element;
-  host_slice_pitch_bytes   = host_slice_pitch*  sizeof_element;
+    buffer_origin[0] = bufforg[0];
+    buffer_origin[1] = bufforg[1];
+    buffer_origin[2] = bufforg[2];
+
+    host_origin[0] = host_org[0];
+    host_origin[1] = host_org[1];
+    host_origin[2] = host_org[2];
+
+    region[0] = region_in[0];
+    region[1] = region_in[1];
+    region[2] = region_in[2];
+
+    buffer_row_pitch = buffer_pitch[0];
+    buffer_slice_pitch = buffer_pitch[1];
+    host_row_pitch = host_pitch[0];
+    host_slice_pitch = host_pitch[1];
+
+    int sizeof_element = sizeof(T);
+    for (int k = 0; k < 3; k++)
+    {
+        buffer_origin_bytes[k] = buffer_origin[k] * sizeof_element;
+        host_origin_bytes[k] = host_origin[k] * sizeof_element;
+    }
+
+    region_bytes[0] = region[0] * sizeof_element;
+    region_bytes[1] = region[1];
+    region_bytes[2] = region[2];
+    buffer_row_pitch_bytes = buffer_row_pitch * sizeof_element;
+    buffer_slice_pitch_bytes = buffer_slice_pitch * sizeof_element;
+    host_row_pitch_bytes = host_row_pitch * sizeof_element;
+    host_slice_pitch_bytes = host_slice_pitch * sizeof_element;
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp b/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
index b124960d..a6f90d06 100644
--- a/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
+++ b/test_conformance/mem_host_flags/checker_image_mem_host_no_access.hpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,138 +18,149 @@
 
 #include "checker_image_mem_host_write_only.hpp"
 
-template < class T>
-class cImage_check_mem_host_no_access : public cImage_check_mem_host_write_only<T>
-{
+template <class T>
+class cImage_check_mem_host_no_access
+    : public cImage_check_mem_host_write_only<T> {
 public:
-  cImage_check_mem_host_no_access (cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  : cImage_check_mem_host_write_only <T> (deviceID,context, queue)
-  {
-  }
+    cImage_check_mem_host_no_access(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue)
+        : cImage_check_mem_host_write_only<T>(deviceID, context, queue)
+    {}
 
-  ~cImage_check_mem_host_no_access() {};
+    ~cImage_check_mem_host_no_access(){};
 
-  cl_int verify_RW_Image();
-  cl_int verify_RW_Image_Mapping();
+    cl_int verify_RW_Image();
+    cl_int verify_RW_Image_Mapping();
 };
 
-template < class T>
-cl_int cImage_check_mem_host_no_access<T>:: verify_RW_Image()
+template <class T> cl_int cImage_check_mem_host_no_access<T>::verify_RW_Image()
 {
-  this->Init_rect();
-
-  cl_event event;
-  size_t img_orig[3] = {0, 0, 0};
-  size_t img_region[3] = {0, 0, 0};
-  img_region[0] = this->m_cl_Image_desc.image_width;
-  img_region[1] = this->m_cl_Image_desc.image_height;
-  img_region[2] = this->m_cl_Image_desc.image_depth;
-
-  int color[4] = {0xFF, 0xFF, 0xFF, 0xFF};
-  cl_int err = CL_SUCCESS;
-  err = clEnqueueFillImage(this->m_queue, this->m_Image,
-                          &color,
-                          img_orig, img_region,
-                          0, NULL, &event);
-  test_error(err, "clEnqueueFillImage error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  this->update_host_mem_2();
-
-  int total = (int)(this->region[0] * this->region[1] * this->region[2]);
-
-  T v = 0xFFFFFFFF;
-  int tot = (int)(this->host_m_2.Count(v));
-  if(tot != total){
-    log_error("Buffer data content difference found\n");
-    return FAILURE;
-  }
-
-  err = clEnqueueWriteImage(this->m_queue, this->m_Image, this->m_blocking,
-                            this->buffer_origin, this->region,
-                            this-> buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
-                            this->host_m_1.pData, 0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueWriteImage on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return err;
+    this->Init_rect();
+
+    cl_event event;
+    size_t img_orig[3] = { 0, 0, 0 };
+    size_t img_region[3] = { 0, 0, 0 };
+    img_region[0] = this->m_cl_Image_desc.image_width;
+    img_region[1] = this->m_cl_Image_desc.image_height;
+    img_region[2] = this->m_cl_Image_desc.image_depth;
+
+    int color[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
+    cl_int err = CL_SUCCESS;
+    err = clEnqueueFillImage(this->m_queue, this->m_Image, &color, img_orig,
+                             img_region, 0, NULL, &event);
+    test_error(err, "clEnqueueFillImage error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    this->update_host_mem_2();
+
+    int total = (int)(this->region[0] * this->region[1] * this->region[2]);
+
+    T v = 0xFFFFFFFF;
+    int tot = (int)(this->host_m_2.Count(v));
+    if (tot != total)
+    {
+        log_error("Buffer data content difference found\n");
+        return FAILURE;
+    }
+
+    err = clEnqueueWriteImage(
+        this->m_queue, this->m_Image, this->m_blocking, this->buffer_origin,
+        this->region, this->buffer_row_pitch_bytes,
+        this->buffer_slice_pitch_bytes, this->host_m_1.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueWriteImage on a memory object created with the "
+            "CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return err;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    v = 0;
+    this->host_m_2.Set_to(v);
+    err = clEnqueueReadImage(
+        this->m_queue, this->m_Image, this->m_blocking, this->buffer_origin,
+        this->region, this->buffer_row_pitch_bytes,
+        this->buffer_slice_pitch_bytes, this->host_m_2.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueReadImage on a memory object created with the "
+            "CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return err;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
 
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  v = 0;
-  this->host_m_2.Set_to(v);
-  err = clEnqueueReadImage(this->m_queue, this->m_Image, this->m_blocking,
-                           this->buffer_origin, this->region,
-                           this-> buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
-                           this->host_m_2.pData, 0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueReadImage on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
     return err;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
 }
 
-template < class T>
+template <class T>
 cl_int cImage_check_mem_host_no_access<T>::verify_RW_Image_Mapping()
 {
-  this->Init_rect();
-
-  cl_event event;
-  cl_int err = CL_SUCCESS;
+    this->Init_rect();
+
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+
+    T* dataPtr = (T*)clEnqueueMapImage(
+        this->m_queue, this->m_Image, this->m_blocking, CL_MAP_WRITE,
+        this->buffer_origin, this->region, &(this->buffer_row_pitch_bytes),
+        &(this->buffer_slice_pitch_bytes), 0, NULL, &event, &err);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapImage (CL_MAP_WRITE) on a memory object "
+                  "created with the CL_MEM_HOST_NO_ACCESS flag should not "
+                  "return CL_SUCCESS\n");
+        err = FAILURE;
+        return err;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    dataPtr = (T*)clEnqueueMapImage(
+        this->m_queue, this->m_Image, this->m_blocking, CL_MAP_READ,
+        this->buffer_origin, this->region, &(this->buffer_row_pitch_bytes),
+        &(this->buffer_slice_pitch_bytes), 0, NULL, &event, &err);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapImage (CL_MAP_READ) on a memory object "
+                  "created with the CL_MEM_HOST_NO_ACCESS flag should not "
+                  "return CL_SUCCESS\n");
+        err = FAILURE;
+        return err;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
 
-  T * dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
-                                       CL_MAP_WRITE,
-                                       this->buffer_origin, this->region,
-                                       &(this-> buffer_row_pitch_bytes),
-                                       &(this->buffer_slice_pitch_bytes),
-                                       0, NULL, &event, &err);
-
-  if ( err ==  CL_SUCCESS)    {
-    log_error("Calling clEnqueueMapImage (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return err;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
-                                   CL_MAP_READ,
-                                   this->buffer_origin, this->region,
-                                   &(this-> buffer_row_pitch_bytes),
-                                   &(this->buffer_slice_pitch_bytes),
-                                   0, NULL, &event, &err);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueMapImage (CL_MAP_READ) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
     return err;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/checker_image_mem_host_read_only.hpp b/test_conformance/mem_host_flags/checker_image_mem_host_read_only.hpp
index 57b535b8..260b9867 100644
--- a/test_conformance/mem_host_flags/checker_image_mem_host_read_only.hpp
+++ b/test_conformance/mem_host_flags/checker_image_mem_host_read_only.hpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,273 +18,293 @@
 
 #include "checker.h"
 
-template < class T> class cImage_check_mem_host_read_only : public cBuffer_checker<T>
-{
+template <class T>
+class cImage_check_mem_host_read_only : public cBuffer_checker<T> {
 public:
-  cImage_check_mem_host_read_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  : cBuffer_checker <T> (deviceID, context, queue)
-  {
-    m_cl_image_format.image_channel_order = CL_RGBA;
-    m_cl_image_format.image_channel_data_type = CL_UNSIGNED_INT8;
-
-    m_cl_Image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
-    m_cl_Image_desc.image_width = 0;
-    m_cl_Image_desc.image_height = 0;
-    m_cl_Image_desc.image_depth = 0;
-    m_cl_Image_desc.image_array_size = 0;
-    m_cl_Image_desc.image_row_pitch = 0;
-    m_cl_Image_desc.image_slice_pitch = 0;
-    m_cl_Image_desc.num_mip_levels = 0;
-    m_cl_Image_desc.num_samples = 0;
-    m_cl_Image_desc.mem_object = NULL;
-
-    m_Image = NULL;
-  };
-
-  ~cImage_check_mem_host_read_only()
-  {
-  };
-
-  cl_int get_image_elements();
-
-  cl_image_format m_cl_image_format;
-  cl_image_desc m_cl_Image_desc;
-  clMemWrapper m_Image;
-
-  virtual cl_int SetupImage();
-  virtual cl_int SetupBuffer();
-  virtual cl_int verify_RW_Image();
-
-  virtual cl_int verify_RW_Image_Mapping();
-  virtual cl_int verify_data(T *pdtaIn);
-  virtual cl_int verify_data_with_offset(T *pdtaIn, size_t *offset);
-
-  cl_int get_image_content_size();
-  cl_int get_image_data_size();
-
-  virtual cl_int verify_RW_Buffer();
-  virtual cl_int verify_RW_Buffer_rect();
-  virtual cl_int verify_RW_Buffer_mapping();
-  cl_int verify_mapping_ptr(T *ptr);
+    cImage_check_mem_host_read_only(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue)
+        : cBuffer_checker<T>(deviceID, context, queue)
+    {
+        m_cl_image_format.image_channel_order = CL_RGBA;
+        m_cl_image_format.image_channel_data_type = CL_UNSIGNED_INT8;
+
+        m_cl_Image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+        m_cl_Image_desc.image_width = 0;
+        m_cl_Image_desc.image_height = 0;
+        m_cl_Image_desc.image_depth = 0;
+        m_cl_Image_desc.image_array_size = 0;
+        m_cl_Image_desc.image_row_pitch = 0;
+        m_cl_Image_desc.image_slice_pitch = 0;
+        m_cl_Image_desc.num_mip_levels = 0;
+        m_cl_Image_desc.num_samples = 0;
+        m_cl_Image_desc.mem_object = NULL;
+
+        m_Image = NULL;
+    };
+
+    ~cImage_check_mem_host_read_only(){};
+
+    cl_int get_image_elements();
+
+    cl_image_format m_cl_image_format;
+    cl_image_desc m_cl_Image_desc;
+    clMemWrapper m_Image;
+
+    virtual cl_int SetupImage();
+    virtual cl_int SetupBuffer();
+    virtual cl_int verify_RW_Image();
+
+    virtual cl_int verify_RW_Image_Mapping();
+    virtual cl_int verify_data(T *pdtaIn);
+    virtual cl_int verify_data_with_offset(T *pdtaIn, size_t *offset);
+
+    cl_int get_image_content_size();
+    cl_int get_image_data_size();
+
+    virtual cl_int verify_RW_Buffer();
+    virtual cl_int verify_RW_Buffer_rect();
+    virtual cl_int verify_RW_Buffer_mapping();
+    cl_int verify_mapping_ptr(T *ptr);
 };
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_mapping_ptr( T* dataPtr)
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::verify_mapping_ptr(T *dataPtr)
 {
-  int offset_pixel = (int)(this->buffer_origin[0] + this->buffer_origin[1] *
-                           this->buffer_row_pitch_bytes/ sizeof(T) + this->buffer_origin[2] *
-                           this->buffer_slice_pitch_bytes/sizeof(T));
+    int offset_pixel = (int)(this->buffer_origin[0]
+                             + this->buffer_origin[1]
+                                 * this->buffer_row_pitch_bytes / sizeof(T)
+                             + this->buffer_origin[2]
+                                 * this->buffer_slice_pitch_bytes / sizeof(T));
 
-  dataPtr = dataPtr - offset_pixel;
+    dataPtr = dataPtr - offset_pixel;
 
-  cl_int err = CL_SUCCESS;
-
-  if (this->buffer_mem_flag & CL_MEM_USE_HOST_PTR)
-  {
-    if (this->pHost_ptr != this->host_m_1.pData)
-    {
-      log_error("Host memory pointer difference found\n");
-      return FAILURE;
-    }
+    cl_int err = CL_SUCCESS;
 
-    if(dataPtr != this->host_m_1.pData)
+    if (this->buffer_mem_flag & CL_MEM_USE_HOST_PTR)
     {
-      log_error("Mapped host pointer difference found\n");
-      return FAILURE;
+        if (this->pHost_ptr != this->host_m_1.pData)
+        {
+            log_error("Host memory pointer difference found\n");
+            return FAILURE;
+        }
+
+        if (dataPtr != this->host_m_1.pData)
+        {
+            log_error("Mapped host pointer difference found\n");
+            return FAILURE;
+        }
     }
-  }
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_RW_Buffer() { return CL_SUCCESS; };
+template <class T> cl_int cImage_check_mem_host_read_only<T>::verify_RW_Buffer()
+{
+    return CL_SUCCESS;
+};
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_RW_Buffer_rect()  { return CL_SUCCESS; };
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::verify_RW_Buffer_rect()
+{
+    return CL_SUCCESS;
+};
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_RW_Buffer_mapping() { return CL_SUCCESS; };
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::verify_RW_Buffer_mapping()
+{
+    return CL_SUCCESS;
+};
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::SetupBuffer()
+template <class T> cl_int cImage_check_mem_host_read_only<T>::SetupBuffer()
 {
-  return cBuffer_checker< T >::SetupBuffer();
+    return cBuffer_checker<T>::SetupBuffer();
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::get_image_content_size()
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::get_image_content_size()
 {
-  return ((cl_int)(m_cl_Image_desc.image_width*m_cl_Image_desc.image_height *
-                   m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size));
+    return ((cl_int)(m_cl_Image_desc.image_width * m_cl_Image_desc.image_height
+                     * m_cl_Image_desc.image_depth
+                     * m_cl_Image_desc.image_array_size));
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::get_image_data_size()
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::get_image_data_size()
 {
-  size_t slice_pitch = m_cl_Image_desc.image_slice_pitch ? m_cl_Image_desc.image_slice_pitch :
-    (m_cl_Image_desc.image_height *  m_cl_Image_desc.image_width);
-  return (slice_pitch * m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size);
+    size_t slice_pitch = m_cl_Image_desc.image_slice_pitch
+        ? m_cl_Image_desc.image_slice_pitch
+        : (m_cl_Image_desc.image_height * m_cl_Image_desc.image_width);
+    return (slice_pitch * m_cl_Image_desc.image_depth
+            * m_cl_Image_desc.image_array_size);
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::get_image_elements()
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::get_image_elements()
 {
-  return ((cl_int)(m_cl_Image_desc.image_width*m_cl_Image_desc.image_height *
-                   m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size));
+    return ((cl_int)(m_cl_Image_desc.image_width * m_cl_Image_desc.image_height
+                     * m_cl_Image_desc.image_depth
+                     * m_cl_Image_desc.image_array_size));
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::SetupImage()
+template <class T> cl_int cImage_check_mem_host_read_only<T>::SetupImage()
 {
-  int all = (int)(m_cl_Image_desc.image_width * m_cl_Image_desc.image_height *
-                  m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size);
+    int all =
+        (int)(m_cl_Image_desc.image_width * m_cl_Image_desc.image_height
+              * m_cl_Image_desc.image_depth * m_cl_Image_desc.image_array_size);
 
-  T v = TEST_VALUE;
-  this->host_m_1.Init(all, v);
+    T v = TEST_VALUE;
+    this->host_m_1.Init(all, v);
 
-  cl_int err = CL_SUCCESS;
-  this-> m_Image = clCreateImage(this->m_context, this->buffer_mem_flag,
-                                 &( this-> m_cl_image_format), &(this-> m_cl_Image_desc),
-                                 this->host_m_1.pData, &err);
-  test_error(err , "clCreateImage error");
+    cl_int err = CL_SUCCESS;
+    this->m_Image = clCreateImage(
+        this->m_context, this->buffer_mem_flag, &(this->m_cl_image_format),
+        &(this->m_cl_Image_desc), this->host_m_1.pData, &err);
+    test_error(err, "clCreateImage error");
 
-  this-> pHost_ptr = (void *) (this->host_m_1.pData);
+    this->pHost_ptr = (void *)(this->host_m_1.pData);
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_data(T *pDataIN)
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::verify_data(T *pDataIN)
 {
-  cl_int err = CL_SUCCESS;
-  if (!this->host_m_1.Equal_rect_from_orig(pDataIN, this->buffer_origin,
-                                           this->region, this->host_row_pitch,
-                                           this->host_slice_pitch)) {
-    log_error("Buffer data difference found\n");
-    return FAILURE;
-  }
-
-  return err;
+    cl_int err = CL_SUCCESS;
+    if (!this->host_m_1.Equal_rect_from_orig(pDataIN, this->buffer_origin,
+                                             this->region, this->host_row_pitch,
+                                             this->host_slice_pitch))
+    {
+        log_error("Buffer data difference found\n");
+        return FAILURE;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_data_with_offset(T *pDataIN,
-                                                                     size_t *offset)
+template <class T>
+cl_int
+cImage_check_mem_host_read_only<T>::verify_data_with_offset(T *pDataIN,
+                                                            size_t *offset)
 {
-  cl_int err = CL_SUCCESS;
-  if (!this->host_m_2.Equal_rect_from_orig(pDataIN, offset, this->region,
-                                           this->host_row_pitch,
-                                           this->host_slice_pitch)) {
-    log_error("Buffer data difference found\n");
-    return FAILURE;
-  }
-
-  return err;
+    cl_int err = CL_SUCCESS;
+    if (!this->host_m_2.Equal_rect_from_orig(pDataIN, offset, this->region,
+                                             this->host_row_pitch,
+                                             this->host_slice_pitch))
+    {
+        log_error("Buffer data difference found\n");
+        return FAILURE;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_RW_Image()
+template <class T> cl_int cImage_check_mem_host_read_only<T>::verify_RW_Image()
 {
-  this->Init_rect();
-
-  int imge_content_size = this->get_image_content_size();
-  T v = 0;
-  this->host_m_2.Init( imge_content_size, v);
+    this->Init_rect();
 
-  cl_event event;
-  cl_int err = CL_SUCCESS;
-  err = clEnqueueReadImage(this->m_queue, this->m_Image, this->m_blocking,
-                           this->buffer_origin, this->region,
-                           this-> buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
-                           this->host_m_2.pData, 0, NULL, &event);
+    int imge_content_size = this->get_image_content_size();
+    T v = 0;
+    this->host_m_2.Init(imge_content_size, v);
 
-  test_error(err, "clEnqueueReadImage error");
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+    err = clEnqueueReadImage(
+        this->m_queue, this->m_Image, this->m_blocking, this->buffer_origin,
+        this->region, this->buffer_row_pitch_bytes,
+        this->buffer_slice_pitch_bytes, this->host_m_2.pData, 0, NULL, &event);
 
-  if ( !this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
+    test_error(err, "clEnqueueReadImage error");
 
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
 
-  err = this->verify_data(this->host_m_2.pData);
-  test_error(err, "verify_data error");
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
 
-  err = clEnqueueWriteImage(this->m_queue, this->m_Image, this->m_blocking,
-                            this->buffer_origin, this->region,
-                            this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
-                            this->host_m_2.pData, 0, NULL, &event);
+    err = this->verify_data(this->host_m_2.pData);
+    test_error(err, "verify_data error");
 
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueWriteImage on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
+    err = clEnqueueWriteImage(
+        this->m_queue, this->m_Image, this->m_blocking, this->buffer_origin,
+        this->region, this->buffer_row_pitch_bytes,
+        this->buffer_slice_pitch_bytes, this->host_m_2.pData, 0, NULL, &event);
 
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueWriteImage on a memory object created with the "
+            "CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cImage_check_mem_host_read_only< T >::verify_RW_Image_Mapping()
+template <class T>
+cl_int cImage_check_mem_host_read_only<T>::verify_RW_Image_Mapping()
 {
-  cl_event event;
-  cl_int err = CL_SUCCESS;
+    cl_event event;
+    cl_int err = CL_SUCCESS;
 
-  T * dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
-                                       CL_MAP_READ,
-                                       this->buffer_origin, this->region,
-                                       &(this-> buffer_row_pitch_bytes),
-                                       &(this->buffer_slice_pitch_bytes),
-                                       0, NULL, &event, &err);
+    T *dataPtr = (T *)clEnqueueMapImage(
+        this->m_queue, this->m_Image, this->m_blocking, CL_MAP_READ,
+        this->buffer_origin, this->region, &(this->buffer_row_pitch_bytes),
+        &(this->buffer_slice_pitch_bytes), 0, NULL, &event, &err);
 
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
 
-  err= this->verify_mapping_ptr(dataPtr);
-  test_error(err, "clEnqueueMapImage error");
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
 
-  err = this->verify_data(dataPtr);
-  test_error(err, "verify_data error");
+    err = this->verify_mapping_ptr(dataPtr);
+    test_error(err, "clEnqueueMapImage error");
 
-  err= clEnqueueUnmapMemObject (this->m_queue, this->m_Image, dataPtr, 0, NULL, &event);
-  test_error(err, "clEnqueueUnmapMemObject error");
+    err = this->verify_data(dataPtr);
+    test_error(err, "verify_data error");
 
-  err = clWaitForEvents(1, &event);
-  test_error(err, "clWaitForEvents error");
+    err = clEnqueueUnmapMemObject(this->m_queue, this->m_Image, dataPtr, 0,
+                                  NULL, &event);
+    test_error(err, "clEnqueueUnmapMemObject error");
 
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
+    err = clWaitForEvents(1, &event);
+    test_error(err, "clWaitForEvents error");
 
-  dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
-                                   CL_MAP_WRITE,
-                                   this->buffer_origin,
-                                   this->region,
-                                   &(this-> buffer_row_pitch_bytes),
-                                   &(this->buffer_slice_pitch_bytes),
-                                   0, NULL, &event, &err);
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
 
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueMapImage (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
+    dataPtr = (T *)clEnqueueMapImage(
+        this->m_queue, this->m_Image, this->m_blocking, CL_MAP_WRITE,
+        this->buffer_origin, this->region, &(this->buffer_row_pitch_bytes),
+        &(this->buffer_slice_pitch_bytes), 0, NULL, &event, &err);
 
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapImage (CL_MAP_WRITE) on a memory object "
+                  "created with the CL_MEM_HOST_READ_ONLY flag should not "
+                  "return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
 
-  return err;
+    return err;
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/checker_image_mem_host_write_only.hpp b/test_conformance/mem_host_flags/checker_image_mem_host_write_only.hpp
index 35604a6f..7bb99840 100644
--- a/test_conformance/mem_host_flags/checker_image_mem_host_write_only.hpp
+++ b/test_conformance/mem_host_flags/checker_image_mem_host_write_only.hpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,297 +18,309 @@
 
 #include "checker_image_mem_host_read_only.hpp"
 
-template < class T> class cImage_check_mem_host_write_only : public cImage_check_mem_host_read_only<T>
-{
+template <class T>
+class cImage_check_mem_host_write_only
+    : public cImage_check_mem_host_read_only<T> {
 
 public:
-  cImage_check_mem_host_write_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  : cImage_check_mem_host_read_only <T> (deviceID, context, queue)
-  {
-  }
+    cImage_check_mem_host_write_only(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue)
+        : cImage_check_mem_host_read_only<T>(deviceID, context, queue)
+    {}
 
-  ~cImage_check_mem_host_write_only() {};
+    ~cImage_check_mem_host_write_only(){};
 
-  clMemWrapper m_Image_2;
+    clMemWrapper m_Image_2;
 
-  cl_int verify_RW_Image();
-  cl_int verify_RW_Image_Mapping();
+    cl_int verify_RW_Image();
+    cl_int verify_RW_Image_Mapping();
 
-  cl_int Setup_Test_Environment();
-  cl_int update_host_mem_2();
+    cl_int Setup_Test_Environment();
+    cl_int update_host_mem_2();
 
-  cl_int verify_data();
+    cl_int verify_data(T *pdtaIn);
 };
 
-template < class T >
+template <class T>
 cl_int cImage_check_mem_host_write_only<T>::Setup_Test_Environment()
 {
-  int all= this->get_image_elements();
-
-  T vv2 = 0;
-  this->host_m_2.Init( all, vv2);
-  vv2 = TEST_VALUE;
-  this->host_m_0.Init( all, vv2);
-
-  cl_int err = CL_SUCCESS;
-  this->m_Image_2 = clCreateImage(this->m_context,
-                                  CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-                                  &( this-> m_cl_image_format), &(this->m_cl_Image_desc),
-                                  this->host_m_2.pData, &err);
-  test_error(err, "clCreateImage error");
-
-  return err;
+    int all = this->get_image_elements();
+
+    T vv2 = 0;
+    this->host_m_2.Init(all, vv2);
+    vv2 = TEST_VALUE;
+    this->host_m_0.Init(all, vv2);
+
+    cl_int err = CL_SUCCESS;
+    this->m_Image_2 = clCreateImage(
+        this->m_context,
+        CL_MEM_READ_WRITE | CL_MEM_HOST_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+        &(this->m_cl_image_format), &(this->m_cl_Image_desc),
+        this->host_m_2.pData, &err);
+    test_error(err, "clCreateImage error");
+
+    return err;
 }
 
 // Copy image data from a write_only image to a read_write image and read the
 // contents.
-template < class T >
-cl_int cImage_check_mem_host_write_only< T >::update_host_mem_2()
+template <class T>
+cl_int cImage_check_mem_host_write_only<T>::update_host_mem_2()
 {
-  size_t orig[3] = {0, 0, 0};
-  size_t img_region[3] = {0, 0, 0};
-  img_region[0] = this->m_cl_Image_desc.image_width;
-  img_region[1] = this->m_cl_Image_desc.image_height;
-  img_region[2] = this->m_cl_Image_desc.image_depth;
-
-  cl_event event;
-  cl_int err = CL_SUCCESS;
-  err = clEnqueueCopyImage(this->m_queue,
-                           this->m_Image,
-                           this->m_Image_2,
-                           orig,
-                           orig,
-                           img_region,
-                           0, NULL, &event);
-  test_error(err, "clEnqueueCopyImage error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  this->host_m_2.Set_to_zero();
-
-  err = clEnqueueReadImage(this->m_queue, this->m_Image_2, this->m_blocking,
-                           this->buffer_origin, this->region,
-                           this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
-                           this->host_m_2.pData, 0, NULL, &event);
-  test_error(err, "clEnqueueReadImage error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  return err;
+    size_t orig[3] = { 0, 0, 0 };
+    size_t img_region[3] = { 0, 0, 0 };
+    img_region[0] = this->m_cl_Image_desc.image_width;
+    img_region[1] = this->m_cl_Image_desc.image_height;
+    img_region[2] = this->m_cl_Image_desc.image_depth;
+
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+    err = clEnqueueCopyImage(this->m_queue, this->m_Image, this->m_Image_2,
+                             orig, orig, img_region, 0, NULL, &event);
+    test_error(err, "clEnqueueCopyImage error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    this->host_m_2.Set_to_zero();
+
+    err = clEnqueueReadImage(
+        this->m_queue, this->m_Image_2, this->m_blocking, this->buffer_origin,
+        this->region, this->buffer_row_pitch_bytes,
+        this->buffer_slice_pitch_bytes, this->host_m_2.pData, 0, NULL, &event);
+    test_error(err, "clEnqueueReadImage error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    return err;
 }
 
-template < class T >
-cl_int cImage_check_mem_host_write_only<T>::verify_data()
+template <class T>
+cl_int cImage_check_mem_host_write_only<T>::verify_data(T *pdtaIn)
 {
-  cl_int err = CL_SUCCESS;
-  if (!this->host_m_1.Equal_rect_from_orig(this->host_m_2, this->buffer_origin,
-                                           this->region, this->host_row_pitch,
-                                           this->host_slice_pitch)) {
-    log_error("Image and host data difference found\n");
-    return FAILURE;
-  }
-
-  int total = (int)(this->region[0] * this->region[1] * this->region[2]);
-  T v = TEST_VALUE;
-  int tot = (int)(this->host_m_2.Count(v));
-  if(tot != total) {
-    log_error("Image data content difference found\n");
-    return FAILURE;
-  }
-
-  return err;
+    cl_int err = CL_SUCCESS;
+    if (!this->host_m_1.Equal_rect_from_orig(pdtaIn, this->buffer_origin,
+                                             this->region, this->host_row_pitch,
+                                             this->host_slice_pitch))
+    {
+        log_error("Image and host data difference found\n");
+        return FAILURE;
+    }
+
+    int total = (int)(this->region[0] * this->region[1] * this->region[2]);
+    T v = TEST_VALUE;
+    int tot = (int)(this->host_m_2.Count(v));
+    if (tot != total)
+    {
+        log_error("Image data content difference found\n");
+        return FAILURE;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cImage_check_mem_host_write_only<T>::verify_RW_Image()
+template <class T> cl_int cImage_check_mem_host_write_only<T>::verify_RW_Image()
 {
-  cl_int err = CL_SUCCESS;
-
-  this->Init_rect();
-
-  cl_event event;
-  size_t img_orig[3] = {0, 0, 0};
-  size_t img_region[3] = {0, 0, 0};
-  img_region[0] = this->m_cl_Image_desc.image_width;
-  img_region[1] = this->m_cl_Image_desc.image_height;
-  img_region[2] = this->m_cl_Image_desc.image_depth;
-
-  int color[4] = {0xFF, 0xFF, 0xFF, 0xFF};
-  err = clEnqueueFillImage(this->m_queue,
-                           this->m_Image,
-                           &color,
-                           img_orig, img_region,
-                           0, NULL, &event); // Fill the buffer with data
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-  test_error(err, "clEnqueueFillImage error");
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  T v = TEST_VALUE;
-
-  err= clEnqueueWriteImage(this->m_queue, this->m_Image, this->m_blocking,
-                           this->buffer_origin, this->region,
-                           this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
-                           this->host_m_0.pData, 0, NULL, &event);
-  test_error(err, "clEnqueueWriteImage error"); // Test writing to buffer
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  update_host_mem_2(); // Read buffer contents into mem_2
-
-  err = this->verify_data(); // Compare the contents of mem_2 and mem_1,
-                             // mem_1 is same as mem_0 in setup test environment
-  test_error(err, "verify_data error");
-
-  v = 0;
-  this->host_m_2.Set_to(v);
-  err = clEnqueueReadImage(this->m_queue, this->m_Image, this->m_blocking,
-                           this->buffer_origin, this->region,
-                           this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
-                           this->host_m_1.pData, 0, NULL, &event);
-
-  if (err == CL_SUCCESS){
-    log_error("Calling clEnqueueReadImage on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  /* Qualcomm fix: 12506 Do not wait on invalid event/ no need for syncronization calls after clEnqueueReadImage fails
-   *
-   * The call to clEnqueueReadImage fails as expected and returns an invalid event on
-   * which clWaitForEvents cannot be called. (It will rightly fail with a CL_INVALID_EVENT error)
-   * Further, we don't need to do any additional flushes or finishes here since we were in sync
-   * before the (failing) call to clEnqueueReadImage
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, " clWaitForEvents error")
-  }
-  Qualcomm fix: end*/
-
-  return err;
+    cl_int err = CL_SUCCESS;
+
+    this->Init_rect();
+
+    cl_event event;
+    size_t img_orig[3] = { 0, 0, 0 };
+    size_t img_region[3] = { 0, 0, 0 };
+    img_region[0] = this->m_cl_Image_desc.image_width;
+    img_region[1] = this->m_cl_Image_desc.image_height;
+    img_region[2] = this->m_cl_Image_desc.image_depth;
+
+    int color[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
+    err = clEnqueueFillImage(this->m_queue, this->m_Image, &color, img_orig,
+                             img_region, 0, NULL,
+                             &event); // Fill the buffer with data
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+    test_error(err, "clEnqueueFillImage error");
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    T v = TEST_VALUE;
+
+    err = clEnqueueWriteImage(
+        this->m_queue, this->m_Image, this->m_blocking, this->buffer_origin,
+        this->region, this->buffer_row_pitch_bytes,
+        this->buffer_slice_pitch_bytes, this->host_m_0.pData, 0, NULL, &event);
+    test_error(err, "clEnqueueWriteImage error"); // Test writing to buffer
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    update_host_mem_2(); // Read buffer contents into mem_2
+
+    err = this->verify_data(
+        this->host_m_2
+            .pData); // Compare the contents of mem_2 and mem_1,
+                     // mem_1 is same as mem_0 in setup test environment
+    test_error(err, "verify_data error");
+
+    v = 0;
+    this->host_m_2.Set_to(v);
+    err = clEnqueueReadImage(
+        this->m_queue, this->m_Image, this->m_blocking, this->buffer_origin,
+        this->region, this->buffer_row_pitch_bytes,
+        this->buffer_slice_pitch_bytes, this->host_m_1.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueReadImage on a memory object created with the "
+            "CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    /* Qualcomm fix: 12506 Do not wait on invalid event/ no need for
+    syncronization calls after clEnqueueReadImage fails
+     *
+     * The call to clEnqueueReadImage fails as expected and returns an invalid
+    event on
+     * which clWaitForEvents cannot be called. (It will rightly fail with a
+    CL_INVALID_EVENT error)
+     * Further, we don't need to do any additional flushes or finishes here
+    since we were in sync
+     * before the (failing) call to clEnqueueReadImage
+
+    if (!this->m_blocking) {
+      err = clWaitForEvents(1, &event);
+      test_error(err, " clWaitForEvents error")
+    }
+    Qualcomm fix: end*/
+
+    return err;
 }
 
-template < class T >
+template <class T>
 cl_int cImage_check_mem_host_write_only<T>::verify_RW_Image_Mapping()
 {
-  this->Init_rect();
-
-  cl_event event;
-  size_t img_orig[3] = {0, 0, 0};
-  size_t img_region[3] = {0, 0, 0};
-  img_region[0] = this->m_cl_Image_desc.image_width;
-  img_region[1] = this->m_cl_Image_desc.image_height;
-  img_region[2] = this->m_cl_Image_desc.image_depth;
-
-  int color[4] = {0xFF, 0xFF, 0xFF, 0xFF};
-  cl_int err = CL_SUCCESS;
-
-
-  // Fill image with pattern
-  err = clEnqueueFillImage(this->m_queue, this->m_Image,
-                           &color, img_orig, img_region,
-                           0, NULL, &event);
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  // Map image for writing
-  T* dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image,
-                                      this->m_blocking, CL_MAP_WRITE,
-                                      this->buffer_origin, this->region,
-                                      &(this->buffer_row_pitch_bytes),
-                                      &(this->buffer_slice_pitch_bytes),
-                                      0, NULL, &event, &err);
-  test_error(err, "clEnqueueMapImage CL_MAP_WRITE pointer error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  // Verify map pointer
-  err = this->verify_mapping_ptr(dataPtr);
-  test_error(err, "clEnqueueMapImage CL_MAP_WRITE pointer error");
-
-  // Verify mapped data
-
-  // The verify_data_with_offset method below compares dataPtr against
-  // this->host_m_2.pData. The comparison should start at origin {0, 0, 0}.
-  update_host_mem_2();
-
-  // Check the content of mem and host_ptr
-  size_t offset[3] = {0, 0, 0};
-  err = cImage_check_mem_host_read_only<T>::verify_data_with_offset(dataPtr,
-                                                                    offset);
-  test_error(err, "verify_data error");
-
-  // Unmap memory object
-  err = clEnqueueUnmapMemObject(this->m_queue, this->m_Image, dataPtr,
-                                0, NULL, &event);
-  test_error(err, "clEnqueueUnmapMemObject error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  dataPtr = (T*) clEnqueueMapImage(this->m_queue, this->m_Image, this->m_blocking,
-                                   CL_MAP_READ,
-                                   this->buffer_origin, this->region,
-                                   &(this->buffer_row_pitch_bytes),
-                                   &(this->buffer_slice_pitch_bytes),
-                                   0, NULL, &event, &err);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueMapImage (CL_MAP_READ) on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
+    this->Init_rect();
+
+    cl_event event;
+    size_t img_orig[3] = { 0, 0, 0 };
+    size_t img_region[3] = { 0, 0, 0 };
+    img_region[0] = this->m_cl_Image_desc.image_width;
+    img_region[1] = this->m_cl_Image_desc.image_height;
+    img_region[2] = this->m_cl_Image_desc.image_depth;
+
+    int color[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
+    cl_int err = CL_SUCCESS;
+
+
+    // Fill image with pattern
+    err = clEnqueueFillImage(this->m_queue, this->m_Image, &color, img_orig,
+                             img_region, 0, NULL, &event);
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    // Map image for writing
+    T* dataPtr = (T*)clEnqueueMapImage(
+        this->m_queue, this->m_Image, this->m_blocking, CL_MAP_WRITE,
+        this->buffer_origin, this->region, &(this->buffer_row_pitch_bytes),
+        &(this->buffer_slice_pitch_bytes), 0, NULL, &event, &err);
+    test_error(err, "clEnqueueMapImage CL_MAP_WRITE pointer error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    // Verify map pointer
+    err = this->verify_mapping_ptr(dataPtr);
+    test_error(err, "clEnqueueMapImage CL_MAP_WRITE pointer error");
+
+    // Verify mapped data
+
+    // The verify_data_with_offset method below compares dataPtr against
+    // this->host_m_2.pData. The comparison should start at origin {0, 0, 0}.
+    update_host_mem_2();
+
+    // Check the content of mem and host_ptr
+    size_t offset[3] = { 0, 0, 0 };
+    err = cImage_check_mem_host_read_only<T>::verify_data_with_offset(dataPtr,
+                                                                      offset);
+    test_error(err, "verify_data error");
+
+    // Unmap memory object
+    err = clEnqueueUnmapMemObject(this->m_queue, this->m_Image, dataPtr, 0,
+                                  NULL, &event);
+    test_error(err, "clEnqueueUnmapMemObject error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    dataPtr = (T*)clEnqueueMapImage(
+        this->m_queue, this->m_Image, this->m_blocking, CL_MAP_READ,
+        this->buffer_origin, this->region, &(this->buffer_row_pitch_bytes),
+        &(this->buffer_slice_pitch_bytes), 0, NULL, &event, &err);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapImage (CL_MAP_READ) on a memory object "
+                  "created with the CL_MEM_HOST_WRITE_ONLY flag should not "
+                  "return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp b/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
index d1f96f25..babbeea9 100644
--- a/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
+++ b/test_conformance/mem_host_flags/checker_mem_host_no_access.hpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,184 +19,201 @@
 
 #include "checker_mem_host_write_only.hpp"
 
-template < class T> class cBuffer_check_mem_host_no_access : public cBuffer_check_mem_host_write_only< T >
-{
+template <class T>
+class cBuffer_check_mem_host_no_access
+    : public cBuffer_check_mem_host_write_only<T> {
 public:
-  cBuffer_check_mem_host_no_access(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  : cBuffer_check_mem_host_write_only < T > (deviceID, context, queue)
-  {
-  };
-
-  cBuffer_check_mem_host_no_access()
-  {
-  };
-
-  virtual cl_int SetupBuffer();
-  virtual cl_int SetupASSubBuffer(cl_mem_flags parent_buffer_flag);
-  virtual cl_int Setup_Test_Environment();
-
-  cl_int verify_RW_Buffer();
-  cl_int verify_RW_Buffer_rect();
-  cl_int verify_RW_Buffer_mapping();
+    cBuffer_check_mem_host_no_access(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue)
+        : cBuffer_check_mem_host_write_only<T>(deviceID, context, queue){};
+
+    cBuffer_check_mem_host_no_access(){};
+
+    virtual cl_int SetupBuffer();
+    virtual cl_int SetupASSubBuffer(cl_mem_flags parent_buffer_flag);
+    virtual cl_int Setup_Test_Environment();
+
+    cl_int verify_RW_Buffer();
+    cl_int verify_RW_Buffer_rect();
+    cl_int verify_RW_Buffer_mapping();
 };
 
-template < class T >
-cl_int cBuffer_check_mem_host_no_access< T >::SetupBuffer()
+template <class T> cl_int cBuffer_check_mem_host_no_access<T>::SetupBuffer()
 {
-  this->m_nNumber_elements = 1000;
-  T vv1 = TEST_VALUE;
-  this->host_m_1.Init( this->m_nNumber_elements, vv1);
-
-  T vv2 = 0;
-  this->host_m_2.Init( this->m_nNumber_elements, vv2);
-
-  cl_int err;
-  int block_size_in_byte = this->get_block_size_bytes();
-  this->m_buffer = clCreateBuffer(this->m_context, this->buffer_mem_flag,
-                                  block_size_in_byte, this->host_m_1.pData, &err);
-  test_error(err, "clCreateBuffer error");
-  err = this->Check_GetMemObjectInfo(this->buffer_mem_flag);
-
-  if (this->buffer_mem_flag | CL_MEM_USE_HOST_PTR)
-  {
-    this->pHost_ptr = (void *)this->host_m_1.pData;
-  }
-
-  return err;
+    this->m_nNumber_elements = 1000;
+    T vv1 = TEST_VALUE;
+    this->host_m_1.Init(this->m_nNumber_elements, vv1);
+
+    T vv2 = 0;
+    this->host_m_2.Init(this->m_nNumber_elements, vv2);
+
+    cl_int err;
+    int block_size_in_byte = this->get_block_size_bytes();
+    this->m_buffer =
+        clCreateBuffer(this->m_context, this->buffer_mem_flag,
+                       block_size_in_byte, this->host_m_1.pData, &err);
+    test_error(err, "clCreateBuffer error");
+    err = this->Check_GetMemObjectInfo(this->buffer_mem_flag);
+
+    if (this->buffer_mem_flag | CL_MEM_USE_HOST_PTR)
+    {
+        this->pHost_ptr = (void *)this->host_m_1.pData;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_no_access< T >::SetupASSubBuffer(cl_mem_flags parent_buffer_flag)
+template <class T>
+cl_int cBuffer_check_mem_host_no_access<T>::SetupASSubBuffer(
+    cl_mem_flags parent_buffer_flag)
 {
-  return cBuffer_checker<T>::SetupASSubBuffer(parent_buffer_flag);
+    return cBuffer_checker<T>::SetupASSubBuffer(parent_buffer_flag);
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_no_access< T >::Setup_Test_Environment()
+template <class T>
+cl_int cBuffer_check_mem_host_no_access<T>::Setup_Test_Environment()
 {
-  cBuffer_check_mem_host_write_only<T>::Setup_Test_Environment();
+    cBuffer_check_mem_host_write_only<T>::Setup_Test_Environment();
 
-  return CL_SUCCESS;
+    return CL_SUCCESS;
 }
 
-template < class T>
-cl_int cBuffer_check_mem_host_no_access< T >::verify_RW_Buffer()
+template <class T>
+cl_int cBuffer_check_mem_host_no_access<T>::verify_RW_Buffer()
 {
-  cl_event event;
-  cl_int err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, this->m_blocking, 0,
-                                   this->get_block_size_bytes(), this->host_m_1.pData,
-                                   0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueWriteBuffer on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking, 0,
-                             this->get_block_size_bytes(), this->host_m_1.pData,
-                             0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueWriteBuffer on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
+    cl_event event;
+    cl_int err = clEnqueueReadBuffer(
+        this->m_queue, this->m_buffer, this->m_blocking, 0,
+        this->get_block_size_bytes(), this->host_m_1.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueWriteBuffer on a memory object created with the "
+            "CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                               0, this->get_block_size_bytes(),
+                               this->host_m_1.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueWriteBuffer on a memory object created with the "
+            "CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_no_access< T >::verify_RW_Buffer_rect()
+template <class T>
+cl_int cBuffer_check_mem_host_no_access<T>::verify_RW_Buffer_rect()
 {
-  this->Init_rect();
-  cl_event event;
-  cl_int err = CL_SUCCESS;
-  err = clEnqueueReadBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
-                                this->buffer_origin_bytes,
-                                this->host_origin_bytes,
-                                this->region_bytes,
-                                this->buffer_row_pitch_bytes,
-                                this->buffer_slice_pitch_bytes,
-                                this->host_row_pitch_bytes,
-                                this->host_slice_pitch_bytes,
-                                this->host_m_2.pData,
-                                0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueReadBufferRect on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  err = clEnqueueWriteBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
-                                 this->buffer_origin_bytes ,
-                                 this->host_origin_bytes,
-                                 this->region_bytes,
-                                 this->buffer_row_pitch_bytes,
-                                 this->buffer_slice_pitch_bytes,
-                                 this->host_row_pitch_bytes,
-                                 this->host_slice_pitch_bytes,
-                                 this->host_m_2.pData,
-                                 0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueWriteBufferRect on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
+    this->Init_rect();
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+    err = clEnqueueReadBufferRect(
+        this->m_queue, this->m_buffer, this->m_blocking,
+        this->buffer_origin_bytes, this->host_origin_bytes, this->region_bytes,
+        this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+        this->host_row_pitch_bytes, this->host_slice_pitch_bytes,
+        this->host_m_2.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueReadBufferRect on a memory object created with "
+            "the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    err = clEnqueueWriteBufferRect(
+        this->m_queue, this->m_buffer, this->m_blocking,
+        this->buffer_origin_bytes, this->host_origin_bytes, this->region_bytes,
+        this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+        this->host_row_pitch_bytes, this->host_slice_pitch_bytes,
+        this->host_m_2.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueWriteBufferRect on a memory object created with "
+            "the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_no_access< T >::verify_RW_Buffer_mapping()
+template <class T>
+cl_int cBuffer_check_mem_host_no_access<T>::verify_RW_Buffer_mapping()
 {
-  cl_event event;
-  cl_int err;
-
-  void *dataPtr;
-  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking, CL_MAP_READ,
-                               0, this->get_block_size_bytes(), 0, NULL, &event, &err);
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueMapBuffer (CL_MAP_READ) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking, CL_MAP_WRITE,
-                               0, this->get_block_size_bytes(), 0, NULL, &event, &err);
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueMapBuffer (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_NO_ACCESS flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
+    cl_event event;
+    cl_int err;
+
+    void *dataPtr;
+    dataPtr = clEnqueueMapBuffer(
+        this->m_queue, this->m_buffer, this->m_blocking, CL_MAP_READ, 0,
+        this->get_block_size_bytes(), 0, NULL, &event, &err);
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapBuffer (CL_MAP_READ) on a memory object "
+                  "created with the CL_MEM_HOST_NO_ACCESS flag should not "
+                  "return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    dataPtr = clEnqueueMapBuffer(
+        this->m_queue, this->m_buffer, this->m_blocking, CL_MAP_WRITE, 0,
+        this->get_block_size_bytes(), 0, NULL, &event, &err);
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapBuffer (CL_MAP_WRITE) on a memory "
+                  "object created with the CL_MEM_HOST_NO_ACCESS flag should "
+                  "not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/checker_mem_host_read_only.hpp b/test_conformance/mem_host_flags/checker_mem_host_read_only.hpp
index 44910b04..ea23ae5a 100644
--- a/test_conformance/mem_host_flags/checker_mem_host_read_only.hpp
+++ b/test_conformance/mem_host_flags/checker_mem_host_read_only.hpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,259 +18,275 @@
 
 #include "checker.h"
 
-template < class T> class cBuffer_check_mem_host_read_only : public cBuffer_checker<T>
-{
+template <class T>
+class cBuffer_check_mem_host_read_only : public cBuffer_checker<T> {
 public:
-  cBuffer_check_mem_host_read_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  : cBuffer_checker <T> (deviceID, context, queue)
-  {
-  };
-
-  ~cBuffer_check_mem_host_read_only()
-  {
-  };
-
-  virtual cl_int Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag);
-  virtual cl_int SetupBuffer();
-  virtual cl_int SetupASSubBuffer( cl_mem_flags flag_p);
-  virtual cl_int Setup_Test_Environment();
-
-  cl_int verifyData(cl_int err, cl_event & event);
-  cl_int verify_RW_Buffer();
-  cl_int verify_RW_Buffer_rect();
-  cl_int verify_RW_Buffer_mapping();
-};
+    cBuffer_check_mem_host_read_only(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue)
+        : cBuffer_checker<T>(deviceID, context, queue){};
 
-template < class T >
-cl_int cBuffer_check_mem_host_read_only< T >::SetupBuffer()
-{
-  this->m_buffer_type = _BUFFER;
+    ~cBuffer_check_mem_host_read_only(){};
 
-  this->m_nNumber_elements = 888;
-  T vv1 = TEST_VALUE;
-  this->host_m_1.Init(this->m_nNumber_elements, vv1);
-  this->host_m_0.Init(this->m_nNumber_elements, vv1);
+    virtual cl_int Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag);
+    virtual cl_int SetupBuffer();
+    virtual cl_int SetupASSubBuffer(cl_mem_flags flag_p);
+    virtual cl_int Setup_Test_Environment();
 
-  cl_int err = CL_SUCCESS;
-  int block_size_in_byte = (int)(this->m_nNumber_elements * sizeof(T));
-  this->m_buffer = clCreateBuffer(this->m_context, this->buffer_mem_flag,
-                                  block_size_in_byte, this->host_m_1.pData, &err);
-  test_error(err, "clCreateBuffer error");
+    cl_int verifyData(cl_int err, cl_event &event);
+    cl_int verify_RW_Buffer();
+    cl_int verify_RW_Buffer_rect();
+    cl_int verify_RW_Buffer_mapping();
+};
 
-  if (this->buffer_mem_flag & CL_MEM_USE_HOST_PTR)
-  {
-    this->pHost_ptr = (void *)this->host_m_1.pData;
-  }
+template <class T> cl_int cBuffer_check_mem_host_read_only<T>::SetupBuffer()
+{
+    this->m_buffer_type = _BUFFER;
+
+    this->m_nNumber_elements = 888;
+    T vv1 = TEST_VALUE;
+    this->host_m_1.Init(this->m_nNumber_elements, vv1);
+    this->host_m_0.Init(this->m_nNumber_elements, vv1);
+
+    cl_int err = CL_SUCCESS;
+    int block_size_in_byte = (int)(this->m_nNumber_elements * sizeof(T));
+    this->m_buffer =
+        clCreateBuffer(this->m_context, this->buffer_mem_flag,
+                       block_size_in_byte, this->host_m_1.pData, &err);
+    test_error(err, "clCreateBuffer error");
+
+    if (this->buffer_mem_flag & CL_MEM_USE_HOST_PTR)
+    {
+        this->pHost_ptr = (void *)this->host_m_1.pData;
+    }
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_read_only<T>::SetupASSubBuffer(cl_mem_flags flag_p)
+template <class T>
+cl_int
+cBuffer_check_mem_host_read_only<T>::SetupASSubBuffer(cl_mem_flags flag_p)
 {
-  return cBuffer_checker<T>::SetupASSubBuffer(flag_p);
+    return cBuffer_checker<T>::SetupASSubBuffer(flag_p);
 }
 
-template < class T>
+template <class T>
 cl_int cBuffer_check_mem_host_read_only<T>::Setup_Test_Environment()
 {
-  cBuffer_checker<T>::Setup_Test_Environment();
-  T vv2 = 0;
-  this->host_m_2.Init(this->m_nNumber_elements, vv2);
+    cBuffer_checker<T>::Setup_Test_Environment();
+    T vv2 = 0;
+    this->host_m_2.Init(this->m_nNumber_elements, vv2);
 
-  return CL_SUCCESS;
+    return CL_SUCCESS;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_read_only< T >::Check_GetMemObjectInfo(cl_mem_flags buffer_mem_flag)
+template <class T>
+cl_int cBuffer_check_mem_host_read_only<T>::Check_GetMemObjectInfo(
+    cl_mem_flags buffer_mem_flag)
 {
-  cl_int err = CL_SUCCESS;
-  cBuffer_checker<T>::Check_GetMemObjectInfo(buffer_mem_flag);
-
-  if (buffer_mem_flag & CL_MEM_ALLOC_HOST_PTR)
-  {
-    size_t size = 0;
-    err = clGetMemObjectInfo(this->m_buffer, CL_MEM_SIZE, sizeof(size), &size, NULL);
-    void *pp = NULL;
-    err = clGetMemObjectInfo(this->m_buffer, CL_MEM_HOST_PTR, sizeof( pp ), &pp, NULL);
-
-    if (!this->host_m_1.Equal( (T*) (this->pData), this->m_nNumber_elements )) {
-      log_error("Buffer data difference found\n");
-      return FAILURE;
+    cl_int err = CL_SUCCESS;
+    cBuffer_checker<T>::Check_GetMemObjectInfo(buffer_mem_flag);
+
+    if (buffer_mem_flag & CL_MEM_ALLOC_HOST_PTR)
+    {
+        size_t size = 0;
+        err = clGetMemObjectInfo(this->m_buffer, CL_MEM_SIZE, sizeof(size),
+                                 &size, NULL);
+        void *pp = NULL;
+        err = clGetMemObjectInfo(this->m_buffer, CL_MEM_HOST_PTR, sizeof(pp),
+                                 &pp, NULL);
+
+        if (!this->host_m_1.Equal((T *)(this->pData), this->m_nNumber_elements))
+        {
+            log_error("Buffer data difference found\n");
+            return FAILURE;
+        }
     }
-  }
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_read_only< T >::verifyData( cl_int err, cl_event  &   event )
+template <class T>
+cl_int cBuffer_check_mem_host_read_only<T>::verifyData(cl_int err,
+                                                       cl_event &event)
 {
-  if (err != CL_SUCCESS) {
-    err = this->m_nERROR_RETURN_CODE;
-    test_error(err, "clEnqueueReadBuffer error");
-  }
+    if (err != CL_SUCCESS)
+    {
+        err = this->m_nERROR_RETURN_CODE;
+        test_error(err, "clEnqueueReadBuffer error");
+    }
 
-  if (!this->host_m_1.Equal(this->host_m_2)) {
-    err = this->m_nERROR_RETURN_CODE;
-    test_error(err, "clEnqueueReadBuffer data difference found");
-  }
+    if (!this->host_m_1.Equal(this->host_m_2))
+    {
+        err = this->m_nERROR_RETURN_CODE;
+        test_error(err, "clEnqueueReadBuffer data difference found");
+    }
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_read_only< T >::verify_RW_Buffer()
+template <class T>
+cl_int cBuffer_check_mem_host_read_only<T>::verify_RW_Buffer()
 {
-  cl_event event;
-  cl_int err = CL_SUCCESS;
-
-  err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, this->m_blocking,
-                            0, this->get_block_size_bytes(), this->host_m_2.pData,
-                            0, NULL, &event);
-  test_error(err, "clEnqueueReadBuffer error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  if (!this->host_m_1.Equal(this->host_m_2)) {
-    log_error("Buffer data difference found\n");
-    return FAILURE;
-  }
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  // test write
-  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking,
-                             0, this->get_block_size_bytes(), this->host_m_2.pData,
-                             0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueWriteBuffer on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+
+    err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                              0, this->get_block_size_bytes(),
+                              this->host_m_2.pData, 0, NULL, &event);
+    test_error(err, "clEnqueueReadBuffer error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    if (!this->host_m_1.Equal(this->host_m_2))
+    {
+        log_error("Buffer data difference found\n");
+        return FAILURE;
+    }
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    // test write
+    err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                               0, this->get_block_size_bytes(),
+                               this->host_m_2.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueWriteBuffer on a memory object created with the "
+            "CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_read_only< T >::verify_RW_Buffer_rect()
+template <class T>
+cl_int cBuffer_check_mem_host_read_only<T>::verify_RW_Buffer_rect()
 {
-  this->Init_rect();
-
-  T vv2 = 0;
-  this->host_m_2.Set_to( vv2 );
-  cl_event event;
-  cl_int err = CL_SUCCESS;
-
-  err = clEnqueueReadBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
-                                this->buffer_origin_bytes,
-                                this->host_origin_bytes,
-                                this->region_bytes,
-                                this->buffer_row_pitch_bytes,
-                                this->buffer_slice_pitch_bytes,
-                                this->host_row_pitch_bytes,
-                                this->host_slice_pitch_bytes,
-                                this->host_m_2.pData,
-                                0, NULL, &event);
-  test_error(err, "clEnqueueReadBufferRect error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error");
-  }
-
-  if (! this->host_m_1.Equal_rect(this->host_m_2, this->host_origin, this->region,
-                                  this->host_row_pitch, this->host_slice_pitch)) {
-    log_error("Buffer data diffeence found\n");
-    return FAILURE;
-  }
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  // test blocking write rect
-  err = clEnqueueWriteBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
-                                 this->buffer_origin_bytes,
-                                 this->host_origin_bytes,
-                                 this->region_bytes,
-                                 this->buffer_row_pitch_bytes,
-                                 this->buffer_slice_pitch_bytes,
-                                 this->host_row_pitch_bytes,
-                                 this->host_slice_pitch_bytes,
-                                 this->host_m_2.pData,
-                                 0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueWriteBufferRect on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
+    this->Init_rect();
+
+    T vv2 = 0;
+    this->host_m_2.Set_to(vv2);
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+
+    err = clEnqueueReadBufferRect(
+        this->m_queue, this->m_buffer, this->m_blocking,
+        this->buffer_origin_bytes, this->host_origin_bytes, this->region_bytes,
+        this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+        this->host_row_pitch_bytes, this->host_slice_pitch_bytes,
+        this->host_m_2.pData, 0, NULL, &event);
+    test_error(err, "clEnqueueReadBufferRect error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    if (!this->host_m_1.Equal_rect(this->host_m_2, this->host_origin,
+                                   this->region, this->host_row_pitch,
+                                   this->host_slice_pitch))
+    {
+        log_error("Buffer data diffeence found\n");
+        return FAILURE;
+    }
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    // test blocking write rect
+    err = clEnqueueWriteBufferRect(
+        this->m_queue, this->m_buffer, this->m_blocking,
+        this->buffer_origin_bytes, this->host_origin_bytes, this->region_bytes,
+        this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+        this->host_row_pitch_bytes, this->host_slice_pitch_bytes,
+        this->host_m_2.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueWriteBufferRect on a memory object created with "
+            "the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_read_only< T >::verify_RW_Buffer_mapping()
+template <class T>
+cl_int cBuffer_check_mem_host_read_only<T>::verify_RW_Buffer_mapping()
 {
-  cl_int err = CL_SUCCESS;
-  cl_event event;
-  void *dataPtr;
-  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
-                               CL_MAP_READ,
-                               0, this->get_block_size_bytes(),
-                               0, NULL, &event, &err);
-  test_error(err, "clEnqueueMapBuffer error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event );
-    test_error(err, "clWaitForEvents error");
-  }
-
-  if ((this->buffer_mem_flag & CL_MEM_USE_HOST_PTR) && dataPtr != this->pHost_ptr ) {
-    log_error("Mapped host pointer difference found\n");
-    return FAILURE;
-  }
-
-  if(!this->host_m_1.Equal((T*)dataPtr, this->m_nNumber_elements)) {
-    log_error("Buffer content difference found\n");
-    return FAILURE;
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  err = clEnqueueUnmapMemObject(this->m_queue, this->m_buffer, dataPtr, 0,
-                                nullptr, nullptr);
-  test_error(err, "clEnqueueUnmapMemObject error");
-
-  //  test blocking map read
-  clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
-                     CL_MAP_WRITE,
-                     0, this->get_block_size_bytes(),
-                     0, NULL, &event, &err);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueMapBuffer (CL_MAP_WRITE) on a memory object created with the CL_MEM_HOST_READ_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
+    cl_int err = CL_SUCCESS;
+    cl_event event;
+    void *dataPtr;
+    dataPtr = clEnqueueMapBuffer(
+        this->m_queue, this->m_buffer, this->m_blocking, CL_MAP_READ, 0,
+        this->get_block_size_bytes(), 0, NULL, &event, &err);
+    test_error(err, "clEnqueueMapBuffer error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    if ((this->buffer_mem_flag & CL_MEM_USE_HOST_PTR)
+        && dataPtr != this->pHost_ptr)
+    {
+        log_error("Mapped host pointer difference found\n");
+        return FAILURE;
+    }
+
+    if (!this->host_m_1.Equal((T *)dataPtr, this->m_nNumber_elements))
+    {
+        log_error("Buffer content difference found\n");
+        return FAILURE;
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    err = clEnqueueUnmapMemObject(this->m_queue, this->m_buffer, dataPtr, 0,
+                                  nullptr, nullptr);
+    test_error(err, "clEnqueueUnmapMemObject error");
+
+    //  test blocking map read
+    clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                       CL_MAP_WRITE, 0, this->get_block_size_bytes(), 0, NULL,
+                       &event, &err);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapBuffer (CL_MAP_WRITE) on a memory "
+                  "object created with the CL_MEM_HOST_READ_ONLY flag should "
+                  "not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/checker_mem_host_write_only.hpp b/test_conformance/mem_host_flags/checker_mem_host_write_only.hpp
index 1091bc1b..6c7b12da 100644
--- a/test_conformance/mem_host_flags/checker_mem_host_write_only.hpp
+++ b/test_conformance/mem_host_flags/checker_mem_host_write_only.hpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,338 +18,353 @@
 
 #include "checker.h"
 
-template < class T> class cBuffer_check_mem_host_write_only : public cBuffer_checker<T>
-{
+template <class T>
+class cBuffer_check_mem_host_write_only : public cBuffer_checker<T> {
 public:
-  cBuffer_check_mem_host_write_only(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  : cBuffer_checker < T > (deviceID, context, queue)
-  {
-    this->m_nNumber_elements = 1000;
-  };
+    cBuffer_check_mem_host_write_only(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue)
+        : cBuffer_checker<T>(deviceID, context, queue)
+    {
+        this->m_nNumber_elements = 1000;
+    };
 
-  ~cBuffer_check_mem_host_write_only()
-  {
-  };
+    ~cBuffer_check_mem_host_write_only(){};
 
-  cl_program program;
-  cl_kernel kernel;
+    cl_program program;
+    cl_kernel kernel;
 
-  clMemWrapper m_buffer2;
+    clMemWrapper m_buffer2;
 
-  cl_int Setup_Test_Environment();
+    cl_int Setup_Test_Environment();
 
-  cl_int SetupBuffer();
-  cl_int SetupASSubBuffer(cl_mem_flags flag_p);
+    cl_int SetupBuffer();
+    cl_int SetupASSubBuffer(cl_mem_flags flag_p);
 
-  cl_int verifyData(cl_int err, cl_event &event );
-  cl_int update_host_mem_2();
+    cl_int verifyData(cl_int err, cl_event &event);
+    cl_int update_host_mem_2();
 
-  cl_int verify_RW_Buffer();
-  cl_int verify_RW_Buffer_rect();
-  cl_int verify_RW_Buffer_mapping();
+    cl_int verify_RW_Buffer();
+    cl_int verify_RW_Buffer_rect();
+    cl_int verify_RW_Buffer_mapping();
 
-  C_host_memory_block<T> tmp_host_m;
+    C_host_memory_block<T> tmp_host_m;
 
-  virtual cl_int verify_Buffer_initialization();
+    virtual cl_int verify_Buffer_initialization();
 };
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only< T >::SetupBuffer()
+template <class T> cl_int cBuffer_check_mem_host_write_only<T>::SetupBuffer()
 {
-  T vv1 = 0;
-  this->host_m_1.Init( this->m_nNumber_elements, vv1); // zero out buffer
+    T vv1 = 0;
+    this->host_m_1.Init(this->m_nNumber_elements, vv1); // zero out buffer
 
-  // init buffer to 0
-  cl_int err;
-  int block_size_in_byte = this->get_block_size_bytes();
+    // init buffer to 0
+    cl_int err;
+    int block_size_in_byte = this->get_block_size_bytes();
 
-  this->m_buffer = clCreateBuffer(this->m_context, this->buffer_mem_flag,
-                                  block_size_in_byte, this->host_m_1.pData, &err);
-  test_error(err, "clCreateBuffer error");
+    this->m_buffer =
+        clCreateBuffer(this->m_context, this->buffer_mem_flag,
+                       block_size_in_byte, this->host_m_1.pData, &err);
+    test_error(err, "clCreateBuffer error");
 
-  err = this->Check_GetMemObjectInfo(this->buffer_mem_flag);
+    err = this->Check_GetMemObjectInfo(this->buffer_mem_flag);
 
-  if (this->buffer_mem_flag | CL_MEM_USE_HOST_PTR)
-  {
-    this->pHost_ptr = (void *)this->host_m_1.pData;
-  }
+    if (this->buffer_mem_flag | CL_MEM_USE_HOST_PTR)
+    {
+        this->pHost_ptr = (void *)this->host_m_1.pData;
+    }
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only<T>::SetupASSubBuffer(cl_mem_flags flag_p)
+template <class T>
+cl_int
+cBuffer_check_mem_host_write_only<T>::SetupASSubBuffer(cl_mem_flags flag_p)
 {
-  return cBuffer_checker<T>::SetupASSubBuffer(flag_p);
+    return cBuffer_checker<T>::SetupASSubBuffer(flag_p);
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only< T >::Setup_Test_Environment()
+template <class T>
+cl_int cBuffer_check_mem_host_write_only<T>::Setup_Test_Environment()
 {
-  cl_int err;
-  T vv2 = 0;
-  this->host_m_2.Init(this->m_nNumber_elements, vv2);
-
-  // init buffer2 to 0
-  cl_mem_flags buffer_mem_flag2 = CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY;
-  this->m_buffer2 = clCreateBuffer(this->m_context, buffer_mem_flag2,
-                                   this->get_block_size_bytes(), this->host_m_2.pData, &err);
-  test_error(err, "clCreateBuffer error\n");
-
-  return err;
+    cl_int err;
+    T vv2 = 0;
+    this->host_m_2.Init(this->m_nNumber_elements, vv2);
+
+    // init buffer2 to 0
+    cl_mem_flags buffer_mem_flag2 =
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY;
+    this->m_buffer2 = clCreateBuffer(this->m_context, buffer_mem_flag2,
+                                     this->get_block_size_bytes(),
+                                     this->host_m_2.pData, &err);
+    test_error(err, "clCreateBuffer error\n");
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only< T >::verify_Buffer_initialization()
+template <class T>
+cl_int cBuffer_check_mem_host_write_only<T>::verify_Buffer_initialization()
 {
-  cl_int err = CL_SUCCESS;
+    cl_int err = CL_SUCCESS;
 
-  if (this->host_m_1.pData == NULL || this->host_m_2.pData == NULL) {
-    log_error("Data not ready\n");
-    return FAILURE;
-  }
+    if (this->host_m_1.pData == NULL || this->host_m_2.pData == NULL)
+    {
+        log_error("Data not ready\n");
+        return FAILURE;
+    }
 
-  update_host_mem_2();
+    update_host_mem_2();
 
-  if (!this->host_m_1.Equal(this->host_m_2)){
-    log_error("Buffer content difference found\n");
-    return FAILURE;
-  }
+    if (!this->host_m_1.Equal(this->host_m_2))
+    {
+        log_error("Buffer content difference found\n");
+        return FAILURE;
+    }
 
-  return err;
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only< T >::verify_RW_Buffer()
+template <class T>
+cl_int cBuffer_check_mem_host_write_only<T>::verify_RW_Buffer()
 {
-  T vv1 = TEST_VALUE;
-  T vv2 = 0;
-  this->host_m_2.Set_to(vv2);
-
-  tmp_host_m.Init(this->host_m_1.num_elements, vv1) ;
-
-  cl_event event;
-  cl_int err = CL_SUCCESS;
-  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking, 0,
-                             this->get_block_size_bytes(), tmp_host_m.pData,
-                             0, NULL, &event);
-  if (err != CL_SUCCESS ) {
-    test_error(err, "clEnqueueWriteBuffer error");
-  }
-
-  if (!this->m_blocking){
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error")
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  if (tmp_host_m.Equal(this->host_m_2)){
-    log_error("Test data should be different\n");
-    return FAILURE;
-  }
-
-  update_host_mem_2();
-
-  if (!tmp_host_m.Equal(this->host_m_2)){
-    log_error("Buffer content difference found\n");
-    return FAILURE;
-  }
-
-  err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, CL_TRUE, 0,
-                            this->get_block_size_bytes(), this->host_m_2.pData,
-                            0, NULL, &event);
-
-  if ( err == CL_SUCCESS ) {
-    log_error("Calling clEnqueueReadBuffer on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return  err;
+    T vv1 = TEST_VALUE;
+    T vv2 = 0;
+    this->host_m_2.Set_to(vv2);
+
+    tmp_host_m.Init(this->host_m_1.num_elements, vv1);
+
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+    err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                               0, this->get_block_size_bytes(),
+                               tmp_host_m.pData, 0, NULL, &event);
+    if (err != CL_SUCCESS)
+    {
+        test_error(err, "clEnqueueWriteBuffer error");
+    }
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error")
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    if (tmp_host_m.Equal(this->host_m_2))
+    {
+        log_error("Test data should be different\n");
+        return FAILURE;
+    }
+
+    update_host_mem_2();
+
+    if (!tmp_host_m.Equal(this->host_m_2))
+    {
+        log_error("Buffer content difference found\n");
+        return FAILURE;
+    }
+
+    err = clEnqueueReadBuffer(this->m_queue, this->m_buffer, CL_TRUE, 0,
+                              this->get_block_size_bytes(),
+                              this->host_m_2.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueReadBuffer on a memory object created with the "
+            "CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only< T >::verify_RW_Buffer_rect()
+template <class T>
+cl_int cBuffer_check_mem_host_write_only<T>::verify_RW_Buffer_rect()
 {
-  this->Init_rect();
-
-  T vv1= TEST_VALUE;
-  this->host_m_1.Set_to(vv1);
-
-  T vv2 = 0;
-  this->host_m_2.Set_to(vv2);
-
-  cl_event event, event_1;
-
-  cl_int err = CL_SUCCESS;
-
-  vv1 = 0;
-  C_host_memory_block< T > tmp_host_m;
-  tmp_host_m.Init(this->host_m_1.num_elements, vv1); // zero out the buffer
-  err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, CL_TRUE, 0,
-                             this->get_block_size_bytes(), tmp_host_m.pData,
-                             0, NULL, &event_1);
-  test_error(err, "clEnqueueWriteBuffer error");
-
-  vv1 = TEST_VALUE;
-  tmp_host_m.Set_to(vv1);
-  err = clEnqueueWriteBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
-                                 this->buffer_origin_bytes,
-                                 this->host_origin_bytes,
-                                 this->region_bytes,
-                                 this->buffer_row_pitch_bytes,
-                                 this->buffer_slice_pitch_bytes,
-                                 this->host_row_pitch_bytes,
-                                 this->host_slice_pitch_bytes,
-                                 tmp_host_m.pData,
-                                 1, &event_1, &event);
-  test_error(err, "clEnqueueWriteBufferRect error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
-    test_error(err, "clWaitForEvents error")
-  }
-
-  if (tmp_host_m.Equal(this->host_m_2)) {
-    log_error("Test data should be different\n");
-    return FAILURE;
-  }
-
-  err = clReleaseEvent(event_1);
-  test_error(err, "clReleaseEvent error");
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  update_host_mem_2();
-
-  size_t tot_in_reg = this->region[0] * this->region[1] * this->region[2];
-  if (!tmp_host_m.Equal_rect(this->host_m_2, this->host_origin, this->region,
-                             this->host_row_pitch, this->host_slice_pitch)) {
-    log_error("Buffer rect content difference found\n");
-    return FAILURE;
-  }
-
-  if (this->host_m_2.Count(vv1) != tot_in_reg)
-  {
-    log_error("Buffer rect content difference found\n");
-    return FAILURE;
-  }
-
-  err = clEnqueueReadBufferRect(this->m_queue, this->m_buffer, this->m_blocking,
-                                this->buffer_origin_bytes,
-                                this->host_origin_bytes,
-                                this->region_bytes,
-                                this->buffer_row_pitch_bytes,
-                                this->buffer_slice_pitch_bytes,
-                                this->host_row_pitch_bytes,
-                                this->host_slice_pitch_bytes,
-                                this->host_m_2.pData,
-                                0, NULL, &event);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueReadBufferRect on a memory object created with the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-    return FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
-
-  return err;
-}
+    this->Init_rect();
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only< T >::update_host_mem_2()
-{
-  size_t global_work_size[3] = {0, 1, 1};
-  global_work_size[0] = this->get_block_size_bytes();
+    T vv1 = TEST_VALUE;
+    this->host_m_1.Set_to(vv1);
 
-  cl_event event, event_2;
-  cl_int err = clEnqueueCopyBuffer(this->m_queue, this->m_buffer, this->m_buffer2, 0, 0,
-                                   this->m_nNumber_elements* sizeof (T), 0, NULL, &event);
-  test_error(err, "clEnqueueCopyBuffer error");
+    T vv2 = 0;
+    this->host_m_2.Set_to(vv2);
 
-  this->host_m_2.Set_to_zero();
-  err = clEnqueueReadBuffer(this->m_queue, this->m_buffer2, CL_TRUE, 0,
-                            this->get_block_size_bytes(), this->host_m_2.pData,
-                            1, &event, &event_2);
-  test_error(err, "clEnqueueReadBuffer error");
+    cl_event event, event_1;
 
-  clWaitForEvents(1, &event_2);
-  test_error(err, "clWaitForEvents error");
+    cl_int err = CL_SUCCESS;
 
-  err = clReleaseEvent(event_2);
-  test_error(err, "clReleaseEvent error");
+    vv1 = 0;
+    C_host_memory_block<T> tmp_host_m;
+    tmp_host_m.Init(this->host_m_1.num_elements, vv1); // zero out the buffer
+    err = clEnqueueWriteBuffer(this->m_queue, this->m_buffer, CL_TRUE, 0,
+                               this->get_block_size_bytes(), tmp_host_m.pData,
+                               0, NULL, &event_1);
+    test_error(err, "clEnqueueWriteBuffer error");
 
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-  return err;
+    vv1 = TEST_VALUE;
+    tmp_host_m.Set_to(vv1);
+    err = clEnqueueWriteBufferRect(
+        this->m_queue, this->m_buffer, this->m_blocking,
+        this->buffer_origin_bytes, this->host_origin_bytes, this->region_bytes,
+        this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+        this->host_row_pitch_bytes, this->host_slice_pitch_bytes,
+        tmp_host_m.pData, 1, &event_1, &event);
+    test_error(err, "clEnqueueWriteBufferRect error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error")
+    }
+
+    if (tmp_host_m.Equal(this->host_m_2))
+    {
+        log_error("Test data should be different\n");
+        return FAILURE;
+    }
+
+    err = clReleaseEvent(event_1);
+    test_error(err, "clReleaseEvent error");
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    update_host_mem_2();
+
+    size_t tot_in_reg = this->region[0] * this->region[1] * this->region[2];
+    if (!tmp_host_m.Equal_rect(this->host_m_2, this->host_origin, this->region,
+                               this->host_row_pitch, this->host_slice_pitch))
+    {
+        log_error("Buffer rect content difference found\n");
+        return FAILURE;
+    }
+
+    if (this->host_m_2.Count(vv1) != tot_in_reg)
+    {
+        log_error("Buffer rect content difference found\n");
+        return FAILURE;
+    }
+
+    err = clEnqueueReadBufferRect(
+        this->m_queue, this->m_buffer, this->m_blocking,
+        this->buffer_origin_bytes, this->host_origin_bytes, this->region_bytes,
+        this->buffer_row_pitch_bytes, this->buffer_slice_pitch_bytes,
+        this->host_row_pitch_bytes, this->host_slice_pitch_bytes,
+        this->host_m_2.pData, 0, NULL, &event);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error(
+            "Calling clEnqueueReadBufferRect on a memory object created with "
+            "the CL_MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
+        err = FAILURE;
+        return FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
-template < class T >
-cl_int cBuffer_check_mem_host_write_only< T >::verify_RW_Buffer_mapping()
+template <class T>
+cl_int cBuffer_check_mem_host_write_only<T>::update_host_mem_2()
 {
-  T vv2 = 0;
-  this->host_m_2.Set_to(vv2);
-
-  cl_event event;
-  cl_int err = CL_SUCCESS;
-
-  void *dataPtr;
-  int size = this->get_block_size_bytes();
-  dataPtr = clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
-                               CL_MAP_WRITE,
-                               0, size,
-                               0, NULL, &event, &err);
-  test_error(err, "clEnqueueMapBuffer error");
-
-  if (!this->m_blocking) {
-    err = clWaitForEvents(1, &event);
+    cl_event event, event_2;
+    cl_int err = clEnqueueCopyBuffer(
+        this->m_queue, this->m_buffer, this->m_buffer2, 0, 0,
+        this->m_nNumber_elements * sizeof(T), 0, NULL, &event);
+    test_error(err, "clEnqueueCopyBuffer error");
+
+    this->host_m_2.Set_to_zero();
+    err = clEnqueueReadBuffer(this->m_queue, this->m_buffer2, CL_TRUE, 0,
+                              this->get_block_size_bytes(),
+                              this->host_m_2.pData, 1, &event, &event_2);
+    test_error(err, "clEnqueueReadBuffer error");
+
+    clWaitForEvents(1, &event_2);
     test_error(err, "clWaitForEvents error");
-  }
-
-  err = clReleaseEvent(event);
-  test_error(err, "clReleaseEvent error");
-
-  update_host_mem_2();
 
-  if ((this->buffer_mem_flag & CL_MEM_USE_HOST_PTR) && dataPtr != this->pHost_ptr){
-    log_error("Mapped host pointer difference found\n");
-    return FAILURE;
-  }
+    err = clReleaseEvent(event_2);
+    test_error(err, "clReleaseEvent error");
 
-  if(!this->host_m_2.Equal((T*)dataPtr, this->m_nNumber_elements)) {
-    log_error("Buffer content difference found\n");
-    return FAILURE;
-  }
-
-  err = clEnqueueUnmapMemObject(this->m_queue, this->m_buffer, dataPtr, 0,
-                                nullptr, nullptr);
-  test_error(err, "clEnqueueUnmapMemObject error");
-
-  // test map read
-  clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
-                     CL_MAP_READ,
-                     0, this->get_block_size_bytes(),
-                     0, NULL, &event, &err);
-
-  if (err == CL_SUCCESS) {
-    log_error("Calling clEnqueueMapBuffer (CL_MAP_READ) on a memory object created with the MEM_HOST_WRITE_ONLY flag should not return CL_SUCCESS\n");
-    err = FAILURE;
-
-  } else {
-    log_info("Test succeeded\n\n");
-    err = CL_SUCCESS;
-  }
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+    return err;
+}
 
-  return err;
+template <class T>
+cl_int cBuffer_check_mem_host_write_only<T>::verify_RW_Buffer_mapping()
+{
+    T vv2 = 0;
+    this->host_m_2.Set_to(vv2);
+
+    cl_event event;
+    cl_int err = CL_SUCCESS;
+
+    void *dataPtr;
+    int size = this->get_block_size_bytes();
+    dataPtr =
+        clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                           CL_MAP_WRITE, 0, size, 0, NULL, &event, &err);
+    test_error(err, "clEnqueueMapBuffer error");
+
+    if (!this->m_blocking)
+    {
+        err = clWaitForEvents(1, &event);
+        test_error(err, "clWaitForEvents error");
+    }
+
+    err = clReleaseEvent(event);
+    test_error(err, "clReleaseEvent error");
+
+    update_host_mem_2();
+
+    if ((this->buffer_mem_flag & CL_MEM_USE_HOST_PTR)
+        && dataPtr != this->pHost_ptr)
+    {
+        log_error("Mapped host pointer difference found\n");
+        return FAILURE;
+    }
+
+    if (!this->host_m_2.Equal((T *)dataPtr, this->m_nNumber_elements))
+    {
+        log_error("Buffer content difference found\n");
+        return FAILURE;
+    }
+
+    err = clEnqueueUnmapMemObject(this->m_queue, this->m_buffer, dataPtr, 0,
+                                  nullptr, nullptr);
+    test_error(err, "clEnqueueUnmapMemObject error");
+
+    // test map read
+    clEnqueueMapBuffer(this->m_queue, this->m_buffer, this->m_blocking,
+                       CL_MAP_READ, 0, this->get_block_size_bytes(), 0, NULL,
+                       &event, &err);
+
+    if (err == CL_SUCCESS)
+    {
+        log_error("Calling clEnqueueMapBuffer (CL_MAP_READ) on a memory object "
+                  "created with the MEM_HOST_WRITE_ONLY flag should not return "
+                  "CL_SUCCESS\n");
+        err = FAILURE;
+    }
+    else
+    {
+        log_info("Test succeeded\n\n");
+        err = CL_SUCCESS;
+    }
+
+    return err;
 }
 
 #endif
diff --git a/test_conformance/mem_host_flags/main.cpp b/test_conformance/mem_host_flags/main.cpp
index f0649808..2f1f98a4 100644
--- a/test_conformance/mem_host_flags/main.cpp
+++ b/test_conformance/mem_host_flags/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,7 +18,7 @@
 #include <stdio.h>
 #include <string.h>
 
-#if !defined (__APPLE__)
+#if !defined(__APPLE__)
 #include <CL/cl.h>
 #endif
 
@@ -30,18 +30,18 @@
 #endif
 
 test_definition test_list[] = {
-    ADD_TEST( mem_host_read_only_buffer ),
-    ADD_TEST( mem_host_read_only_subbuffer ),
-    ADD_TEST( mem_host_write_only_buffer ),
-    ADD_TEST( mem_host_write_only_subbuffer ),
-    ADD_TEST( mem_host_no_access_buffer ),
-    ADD_TEST( mem_host_no_access_subbuffer ),
-    ADD_TEST( mem_host_read_only_image ),
-    ADD_TEST( mem_host_write_only_image ),
-    ADD_TEST( mem_host_no_access_image ),
+    ADD_TEST(mem_host_read_only_buffer),
+    ADD_TEST(mem_host_read_only_subbuffer),
+    ADD_TEST(mem_host_write_only_buffer),
+    ADD_TEST(mem_host_write_only_subbuffer),
+    ADD_TEST(mem_host_no_access_buffer),
+    ADD_TEST(mem_host_no_access_subbuffer),
+    ADD_TEST(mem_host_read_only_image),
+    ADD_TEST(mem_host_write_only_image),
+    ADD_TEST(mem_host_no_access_image),
 };
 
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
 
 int main(int argc, const char *argv[])
 {
diff --git a/test_conformance/mem_host_flags/mem_host_buffer.cpp b/test_conformance/mem_host_flags/mem_host_buffer.cpp
index dd1f201f..7ad29ec4 100644
--- a/test_conformance/mem_host_flags/mem_host_buffer.cpp
+++ b/test_conformance/mem_host_flags/mem_host_buffer.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,463 +26,498 @@
 #include "checker_mem_host_write_only.hpp"
 #include "checker_mem_host_no_access.hpp"
 
-static int test_mem_host_read_only_buffer_RW(cl_device_id deviceID, cl_context context,
-                                             cl_command_queue queue, cl_bool blocking,
-                                             cl_mem_flags buffer_mem_flag,
-                                             cl_mem_flags parent_buffer_flag,
-                                             enum BUFFER_TYPE buffer_type)
+static int test_mem_host_read_only_buffer_RW(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  log_info("%s\n", __FUNCTION__);
-  cBuffer_check_mem_host_read_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err = checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err = checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err= checker.verify_RW_Buffer();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
+    log_info("%s\n", __FUNCTION__);
+    cBuffer_check_mem_host_read_only<TEST_ELEMENT_TYPE> checker(deviceID,
+                                                                context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
+
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+
+    return err;
 }
 
-static int test_mem_host_read_only_buffer_RW_Rect(cl_device_id deviceID, cl_context context,
-                                                  cl_command_queue queue, cl_bool blocking,
-                                                  cl_mem_flags buffer_mem_flag,
-                                                  cl_mem_flags parent_buffer_flag,
-                                                  enum BUFFER_TYPE buffer_type)
+static int test_mem_host_read_only_buffer_RW_Rect(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  log_info("%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_read_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err= checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err= checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err = checker.verify_RW_Buffer_rect();
-  test_error(err,  __FUNCTION__);
-  clFinish(queue);
-
-  return err;
+    log_info("%s\n", __FUNCTION__);
+
+    cBuffer_check_mem_host_read_only<TEST_ELEMENT_TYPE> checker(deviceID,
+                                                                context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
+
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer_rect();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+
+    return err;
 }
 
-static int test_mem_host_read_only_buffer_RW_Mapping(cl_device_id deviceID, cl_context context,
-                                                     cl_command_queue queue, cl_bool blocking,
-                                                     cl_mem_flags buffer_mem_flag,
-                                                     cl_mem_flags parent_buffer_flag,
-                                                     enum BUFFER_TYPE buffer_type)
+static int test_mem_host_read_only_buffer_RW_Mapping(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  log_info("%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_read_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err= checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err= checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err = checker.verify_RW_Buffer_mapping();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
+    log_info("%s\n", __FUNCTION__);
+
+    cBuffer_check_mem_host_read_only<TEST_ELEMENT_TYPE> checker(deviceID,
+                                                                context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
+
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer_mapping();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+
+    return err;
 }
 
 int test_mem_host_read_only_buffer(cl_device_id deviceID, cl_context context,
                                    cl_command_queue queue, int num_elements)
 {
-  cl_mem_flags buffer_mem_flags[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY};
+    cl_mem_flags buffer_mem_flags[2] = {
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY
+    };
+
+    cl_int err = CL_SUCCESS;
+
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
+    for (int k = 0; k < 2; k++)
+        for (int i = 0; i < 2; i++)
+        {
+
+            err = test_mem_host_read_only_buffer_RW(
+                deviceID, context, queue, blocking[i], buffer_mem_flags[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+
+            err = test_mem_host_read_only_buffer_RW_Rect(
+                deviceID, context, queue, blocking[i], buffer_mem_flags[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+
+            err = test_mem_host_read_only_buffer_RW_Mapping(
+                deviceID, context, queue, blocking[i], buffer_mem_flags[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+        }
+
+    return err;
+}
 
-  cl_int err = CL_SUCCESS;
+int test_mem_host_read_only_subbuffer(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue, int num_elements)
+{
+    cl_mem_flags parent_buffer_mem_flags[1] = { CL_MEM_READ_WRITE
+                                                | CL_MEM_USE_HOST_PTR
+                                                | CL_MEM_HOST_READ_ONLY };
 
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
-  for (int k=0; k<2; k++)
-    for (int i=0; i< 2; i++)
-    {
+    cl_mem_flags buffer_mem_flags[4] = {
+        0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR
+    };
 
-      err = test_mem_host_read_only_buffer_RW(deviceID, context, queue, blocking[i],
-                                              buffer_mem_flags[k], 0, _BUFFER);
-      test_error(err, __FUNCTION__);
+    cl_int err = CL_SUCCESS;
 
-      err = test_mem_host_read_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
-                                                   buffer_mem_flags[k],0, _BUFFER);
-      test_error(err, __FUNCTION__);
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
 
-      err = test_mem_host_read_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
-                                                      buffer_mem_flags[k],0, _BUFFER);
-      test_error(err, __FUNCTION__);
+    for (int p = 0; p < 1; p++)
+    {
+        for (int k = 0; k < 4; k++)
+            for (int i = 0; i < 2; i++)
+            {
+                err = test_mem_host_read_only_buffer_RW(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+                test_error(err, __FUNCTION__);
+
+                err = test_mem_host_read_only_buffer_RW_Rect(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+                test_error(err, __FUNCTION__);
+
+                err = test_mem_host_read_only_buffer_RW_Mapping(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+                test_error(err, __FUNCTION__);
+            }
     }
 
-  return err;
+    return err;
 }
 
-int test_mem_host_read_only_subbuffer(cl_device_id deviceID, cl_context context,
-                                      cl_command_queue queue, int num_elements)
+//=============================== Write only
+
+static cl_int test_mem_host_write_only_buffer_RW(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  cl_mem_flags parent_buffer_mem_flags[1] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY};
+    log_info("%s\n", __FUNCTION__);
 
-  cl_mem_flags buffer_mem_flags[4] = {0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
-    CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR};
+    cBuffer_check_mem_host_write_only<TEST_ELEMENT_TYPE> checker(
+        deviceID, context, queue);
 
-  cl_int err = CL_SUCCESS;
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
 
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
 
-  for (int p=0; p<1; p++) {
-    for (int k=0; k<4; k++)
-      for (int i=0; i<2; i++)
-      {
-        err = test_mem_host_read_only_buffer_RW(deviceID, context, queue, blocking[i],
-                                                buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
-        test_error(err, __FUNCTION__);
+    return err;
+}
 
-        err = test_mem_host_read_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
-                                                     buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
-        test_error(err, __FUNCTION__);
+static cl_int test_mem_host_write_only_buffer_RW_Rect(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
+{
+    log_info("%s\n", __FUNCTION__);
+
+    cBuffer_check_mem_host_write_only<TEST_ELEMENT_TYPE> checker(
+        deviceID, context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
 
-        err = test_mem_host_read_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
-                                                        buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
-        test_error(err, __FUNCTION__);
-      }
-  }
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer_rect();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
 
-  return err;
+    return err;
 }
 
-//=============================== Write only
-
-static cl_int test_mem_host_write_only_buffer_RW(cl_device_id deviceID, cl_context context,
-                                                 cl_command_queue queue, cl_bool blocking,
-                                                 cl_mem_flags buffer_mem_flag,
-                                                 cl_mem_flags parent_buffer_flag,
-                                                 enum BUFFER_TYPE buffer_type)
+static cl_int test_mem_host_write_only_buffer_RW_Mapping(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  log_info("%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_write_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err = checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err = checker.SetupASSubBuffer( parent_buffer_flag );
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err= checker.verify_RW_Buffer();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
-}
+    log_info("%s\n", __FUNCTION__);
+
+    cBuffer_check_mem_host_write_only<TEST_ELEMENT_TYPE> checker(
+        deviceID, context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
 
-static cl_int test_mem_host_write_only_buffer_RW_Rect(cl_device_id deviceID, cl_context context,
-                                                      cl_command_queue queue, cl_bool blocking,
-                                                      cl_mem_flags buffer_mem_flag,
-                                                      cl_mem_flags parent_buffer_flag,
-                                                      enum BUFFER_TYPE buffer_type)
-{
-  log_info("%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_write_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err= checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err= checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err= checker.verify_RW_Buffer_rect();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
-}
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer_mapping();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
 
-static cl_int test_mem_host_write_only_buffer_RW_Mapping(cl_device_id deviceID, cl_context context,
-                                                         cl_command_queue queue, cl_bool blocking,
-                                                         cl_mem_flags buffer_mem_flag,
-                                                         cl_mem_flags parent_buffer_flag,
-                                                         enum BUFFER_TYPE buffer_type)
-{
-  log_info("%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_write_only< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err= checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err= checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err= checker.verify_RW_Buffer_mapping();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
+    return err;
 }
 
 int test_mem_host_write_only_buffer(cl_device_id deviceID, cl_context context,
                                     cl_command_queue queue, int num_elements)
 {
-  cl_mem_flags buffer_mem_flags[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY};
-
-  cl_int err = CL_SUCCESS;
-
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
-  for (int k=0; k<2; k++)
-    for (int i=0; i<2; i++)
-    {
-      err = test_mem_host_write_only_buffer_RW(deviceID, context, queue, blocking[i],
-                                               buffer_mem_flags[k], 0, _BUFFER);
-      test_error(err, __FUNCTION__);
-
-      err = test_mem_host_write_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
-                                                    buffer_mem_flags[k], 0, _BUFFER);
-      test_error(err, __FUNCTION__);
-
-      err = test_mem_host_write_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
-                                                       buffer_mem_flags[k], 0, _BUFFER);
-      test_error(err, __FUNCTION__);
-    }
-
-  return err;
+    cl_mem_flags buffer_mem_flags[2] = {
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY
+    };
+
+    cl_int err = CL_SUCCESS;
+
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
+    for (int k = 0; k < 2; k++)
+        for (int i = 0; i < 2; i++)
+        {
+            err = test_mem_host_write_only_buffer_RW(
+                deviceID, context, queue, blocking[i], buffer_mem_flags[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+
+            err = test_mem_host_write_only_buffer_RW_Rect(
+                deviceID, context, queue, blocking[i], buffer_mem_flags[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+
+            err = test_mem_host_write_only_buffer_RW_Mapping(
+                deviceID, context, queue, blocking[i], buffer_mem_flags[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+        }
+
+    return err;
 }
 
-int test_mem_host_write_only_subbuffer(cl_device_id deviceID, cl_context context,
+int test_mem_host_write_only_subbuffer(cl_device_id deviceID,
+                                       cl_context context,
                                        cl_command_queue queue, int num_elements)
 {
-  cl_mem_flags parent_buffer_mem_flags[1] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY};
-
-  cl_mem_flags buffer_mem_flags[4] = {0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
-    CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR};
-
-  cl_int err = CL_SUCCESS;
+    cl_mem_flags parent_buffer_mem_flags[1] = { CL_MEM_READ_WRITE
+                                                | CL_MEM_USE_HOST_PTR
+                                                | CL_MEM_HOST_WRITE_ONLY };
 
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
+    cl_mem_flags buffer_mem_flags[4] = {
+        0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR
+    };
 
-  for (int p=0; p<1; p++) {
-    for (int m=0; m<4; m++) {
-      for (int i=0; i< 2; i++)
-      {
-        err = test_mem_host_write_only_buffer_RW(deviceID, context, queue, blocking[i],
-                                                 buffer_mem_flags[m], parent_buffer_mem_flags[p], _Sub_BUFFER);
-        test_error(err, __FUNCTION__);
+    cl_int err = CL_SUCCESS;
 
-        err = test_mem_host_write_only_buffer_RW_Rect(deviceID, context, queue, blocking[i],
-                                                      buffer_mem_flags[m], parent_buffer_mem_flags[p], _Sub_BUFFER);
-        test_error(err, __FUNCTION__);
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
 
-        err = test_mem_host_write_only_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
-                                                         buffer_mem_flags[m] , parent_buffer_mem_flags[p], _Sub_BUFFER);
-        test_error(err, __FUNCTION__);
-      }
+    for (int p = 0; p < 1; p++)
+    {
+        for (int m = 0; m < 4; m++)
+        {
+            for (int i = 0; i < 2; i++)
+            {
+                err = test_mem_host_write_only_buffer_RW(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[m],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+                test_error(err, __FUNCTION__);
+
+                err = test_mem_host_write_only_buffer_RW_Rect(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[m],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+                test_error(err, __FUNCTION__);
+
+                err = test_mem_host_write_only_buffer_RW_Mapping(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[m],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+                test_error(err, __FUNCTION__);
+            }
+        }
     }
-  }
 
-  return err;
+    return err;
 }
 
 //=====================  NO ACCESS
 
-static cl_int test_mem_host_no_access_buffer_RW(cl_device_id deviceID, cl_context context,
-                                                cl_command_queue queue, cl_bool blocking,
-                                                cl_mem_flags buffer_mem_flag,
-                                                cl_mem_flags parent_buffer_flag,
-                                                enum BUFFER_TYPE buffer_type)
+static cl_int test_mem_host_no_access_buffer_RW(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  log_info("%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_no_access< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-
-  cl_int err = CL_SUCCESS;
-  switch (buffer_type) {
-    case _BUFFER:
-      err= checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err= checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err= checker.verify_RW_Buffer_mapping();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
-}
+    log_info("%s\n", __FUNCTION__);
 
-static cl_int test_mem_host_no_access_buffer_RW_Rect(cl_device_id deviceID, cl_context context,
-                                                     cl_command_queue queue, cl_bool blocking,
-                                                     cl_mem_flags buffer_mem_flag,
-                                                     cl_mem_flags parent_buffer_flag,
-                                                     enum BUFFER_TYPE buffer_type)
-{
-  log_info( "%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_no_access< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err= checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err= checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err= checker.verify_RW_Buffer_mapping();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
+    cBuffer_check_mem_host_no_access<TEST_ELEMENT_TYPE> checker(deviceID,
+                                                                context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+
+    cl_int err = CL_SUCCESS;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
+
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer_mapping();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+
+    return err;
 }
 
-static cl_int test_mem_host_no_access_buffer_RW_Mapping(cl_device_id deviceID, cl_context context,
-                                                        cl_command_queue queue, cl_bool blocking,
-                                                        cl_mem_flags buffer_mem_flag,
-                                                        cl_mem_flags parent_buffer_flag,
-                                                        enum BUFFER_TYPE buffer_type)
+static cl_int test_mem_host_no_access_buffer_RW_Rect(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  log_info("%s\n", __FUNCTION__);
-
-  cBuffer_check_mem_host_no_access< TEST_ELEMENT_TYPE > checker(deviceID, context, queue);
-
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-  cl_int err;
-  switch (buffer_type) {
-    case _BUFFER:
-      err= checker.SetupBuffer();
-      break;
-    case _Sub_BUFFER:
-      err= checker.SetupASSubBuffer(parent_buffer_flag);
-      break;
-  }
-
-  test_error(err, __FUNCTION__);
-  checker.Setup_Test_Environment();
-  err= checker.verify_RW_Buffer_mapping();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-
-  return err;
+    log_info("%s\n", __FUNCTION__);
+
+    cBuffer_check_mem_host_no_access<TEST_ELEMENT_TYPE> checker(deviceID,
+                                                                context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
+
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer_mapping();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+
+    return err;
 }
 
-int test_mem_host_no_access_buffer(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements)
+static cl_int test_mem_host_no_access_buffer_RW_Mapping(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_flags parent_buffer_flag, enum BUFFER_TYPE buffer_type)
 {
-  cl_mem_flags buffer_mem_flag[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS};
+    log_info("%s\n", __FUNCTION__);
 
-  cl_int err = CL_SUCCESS;
+    cBuffer_check_mem_host_no_access<TEST_ELEMENT_TYPE> checker(deviceID,
+                                                                context, queue);
 
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
-  for (int k=0; k<2; k++)
-    for (int i=0; i<2; i++) {
-      err = test_mem_host_no_access_buffer_RW(deviceID, context, queue, blocking[i],
-                                              buffer_mem_flag[k], 0, _BUFFER);
-      test_error(err, __FUNCTION__);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+    cl_int err;
+    switch (buffer_type)
+    {
+        case _BUFFER: err = checker.SetupBuffer(); break;
+        case _Sub_BUFFER:
+            err = checker.SetupASSubBuffer(parent_buffer_flag);
+            break;
+    }
 
-      err = test_mem_host_no_access_buffer_RW_Rect(deviceID, context, queue, blocking[i],
-                                                   buffer_mem_flag[k], 0, _BUFFER);
-      test_error(err, __FUNCTION__);
+    test_error(err, __FUNCTION__);
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Buffer_mapping();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
 
-      err = test_mem_host_no_access_buffer_RW_Mapping(deviceID, context, queue, blocking[i],
-                                                      buffer_mem_flag[k], 0, _BUFFER);
-      test_error(err, __FUNCTION__);
-    }
+    return err;
+}
 
-  return err;
+int test_mem_host_no_access_buffer(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    cl_mem_flags buffer_mem_flag[2] = {
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS
+    };
+
+    cl_int err = CL_SUCCESS;
+
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
+    for (int k = 0; k < 2; k++)
+        for (int i = 0; i < 2; i++)
+        {
+            err = test_mem_host_no_access_buffer_RW(
+                deviceID, context, queue, blocking[i], buffer_mem_flag[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+
+            err = test_mem_host_no_access_buffer_RW_Rect(
+                deviceID, context, queue, blocking[i], buffer_mem_flag[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+
+            err = test_mem_host_no_access_buffer_RW_Mapping(
+                deviceID, context, queue, blocking[i], buffer_mem_flag[k], 0,
+                _BUFFER);
+            test_error(err, __FUNCTION__);
+        }
+
+    return err;
 }
 
 int test_mem_host_no_access_subbuffer(cl_device_id deviceID, cl_context context,
                                       cl_command_queue queue, int num_elements)
 {
-  cl_mem_flags parent_buffer_mem_flags[3] = { CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
-    CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
-    CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS};
-
-  cl_mem_flags buffer_mem_flags[4] = {0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
-    CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR};
-
-  cl_int err = CL_SUCCESS;
-
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
-  for (int p=0; p<3; p++) {
-    for (int k=0; k<4; k++) {
-      for (int i=0; i<2; i++) {
-        err += test_mem_host_no_access_buffer_RW(deviceID, context, queue, blocking[i],
-                                                buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
-
-        err += test_mem_host_no_access_buffer_RW_Rect(deviceID, context, queue, blocking[i],
-                                                     buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
-
-        err += test_mem_host_no_access_buffer_RW_Mapping( deviceID, context, queue, blocking[i],
-                                                        buffer_mem_flags[k], parent_buffer_mem_flags[p], _Sub_BUFFER);
-      }
+    cl_mem_flags parent_buffer_mem_flags[3] = {
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS
+    };
+
+    cl_mem_flags buffer_mem_flags[4] = {
+        0, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR
+    };
+
+    cl_int err = CL_SUCCESS;
+
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
+    for (int p = 0; p < 3; p++)
+    {
+        for (int k = 0; k < 4; k++)
+        {
+            for (int i = 0; i < 2; i++)
+            {
+                err += test_mem_host_no_access_buffer_RW(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+
+                err += test_mem_host_no_access_buffer_RW_Rect(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+
+                err += test_mem_host_no_access_buffer_RW_Mapping(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    parent_buffer_mem_flags[p], _Sub_BUFFER);
+            }
+        }
     }
-  }
 
-  return err;
+    return err;
 }
diff --git a/test_conformance/mem_host_flags/mem_host_image.cpp b/test_conformance/mem_host_flags/mem_host_image.cpp
index f6b94c98..6307d505 100644
--- a/test_conformance/mem_host_flags/mem_host_image.cpp
+++ b/test_conformance/mem_host_flags/mem_host_image.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -27,338 +27,372 @@
 #include "checker_image_mem_host_write_only.hpp"
 
 //======================================
-static cl_int test_mem_host_read_only_RW_Image(cl_device_id deviceID, cl_context context,
-                                               cl_command_queue queue, cl_bool blocking,
-                                               cl_mem_flags buffer_mem_flag,
-                                               cl_mem_object_type image_type_in,
-                                               size_t array_size, size_t *img_dim)
+static cl_int test_mem_host_read_only_RW_Image(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_object_type image_type_in, size_t array_size, size_t *img_dim)
 {
-  log_info("%s  ... \n ", __FUNCTION__);
-  cl_int err = CL_SUCCESS;
-
-  cImage_check_mem_host_read_only< int > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-
-  checker.m_cl_Image_desc.image_type = image_type_in;
-  checker.m_cl_Image_desc.image_width = img_dim[0];
-  checker.m_cl_Image_desc.image_height = img_dim[1];
-  checker.m_cl_Image_desc.image_depth = img_dim[2];
-  checker.m_cl_Image_desc.image_array_size = array_size;
-  checker.m_cl_Image_desc.image_row_pitch = 0;
-  checker.m_cl_Image_desc.image_slice_pitch = 0;
-  checker.m_cl_Image_desc.num_mip_levels = 0;
-  checker.m_cl_Image_desc.num_samples = 0;
-
-  checker.SetupImage();
-  checker.Init_rect();
-  err = checker.verify_RW_Image();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-  return err;
+    log_info("%s  ... \n ", __FUNCTION__);
+    cl_int err = CL_SUCCESS;
+
+    cImage_check_mem_host_read_only<int> checker(deviceID, context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+
+    checker.m_cl_Image_desc.image_type = image_type_in;
+    checker.m_cl_Image_desc.image_width = img_dim[0];
+    checker.m_cl_Image_desc.image_height = img_dim[1];
+    checker.m_cl_Image_desc.image_depth = img_dim[2];
+    checker.m_cl_Image_desc.image_array_size = array_size;
+    checker.m_cl_Image_desc.image_row_pitch = 0;
+    checker.m_cl_Image_desc.image_slice_pitch = 0;
+    checker.m_cl_Image_desc.num_mip_levels = 0;
+    checker.m_cl_Image_desc.num_samples = 0;
+
+    checker.SetupImage();
+    checker.Init_rect();
+    err = checker.verify_RW_Image();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+    return err;
 }
 
-static cl_int test_mem_host_read_only_RW_Image_Mapping(cl_device_id deviceID, cl_context context,
-                                                       cl_command_queue queue, cl_bool blocking,
-                                                       cl_mem_flags buffer_mem_flag,
-                                                       cl_mem_object_type image_type_in,
-                                                       size_t array_size, size_t *img_dim)
+static cl_int test_mem_host_read_only_RW_Image_Mapping(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_object_type image_type_in, size_t array_size, size_t *img_dim)
 {
-  log_info("%s  ... \n ", __FUNCTION__);
-  cl_int err = CL_SUCCESS;
-
-  cImage_check_mem_host_read_only< int > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-
-  checker.m_cl_Image_desc.image_type = image_type_in;
-  checker.m_cl_Image_desc.image_width = img_dim[0];
-  checker.m_cl_Image_desc.image_height = img_dim[1];
-  checker.m_cl_Image_desc.image_depth = img_dim[2];
-  checker.m_cl_Image_desc.image_array_size = array_size;
-  checker.m_cl_Image_desc.image_row_pitch = 0;
-  checker.m_cl_Image_desc.image_slice_pitch = 0;
-  checker.m_cl_Image_desc.num_mip_levels = 0;
-  checker.m_cl_Image_desc.num_samples = 0;
-
-  checker.SetupImage();
-  checker.Init_rect();
-  err = checker.verify_RW_Image_Mapping();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-  return err;
+    log_info("%s  ... \n ", __FUNCTION__);
+    cl_int err = CL_SUCCESS;
+
+    cImage_check_mem_host_read_only<int> checker(deviceID, context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+
+    checker.m_cl_Image_desc.image_type = image_type_in;
+    checker.m_cl_Image_desc.image_width = img_dim[0];
+    checker.m_cl_Image_desc.image_height = img_dim[1];
+    checker.m_cl_Image_desc.image_depth = img_dim[2];
+    checker.m_cl_Image_desc.image_array_size = array_size;
+    checker.m_cl_Image_desc.image_row_pitch = 0;
+    checker.m_cl_Image_desc.image_slice_pitch = 0;
+    checker.m_cl_Image_desc.num_mip_levels = 0;
+    checker.m_cl_Image_desc.num_samples = 0;
+
+    checker.SetupImage();
+    checker.Init_rect();
+    err = checker.verify_RW_Image_Mapping();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+    return err;
 }
 
 int test_mem_host_read_only_image(cl_device_id deviceID, cl_context context,
                                   cl_command_queue queue, int num_elements)
 {
-  cl_mem_flags buffer_mem_flags[2] = { CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY };
+    cl_mem_flags buffer_mem_flags[2] = {
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_READ_ONLY,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_READ_ONLY
+    };
 
-  cl_int err = CL_SUCCESS;
+    cl_int err = CL_SUCCESS;
 
-  cl_bool image_support;
-  err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof image_support, &image_support, NULL);
-  if (err) {
-    test_error(err, __FUNCTION__);
-    return err;
-  }
-  if (!image_support) {
-    log_info("Images are not supported by the device, skipping test...\n");
-    return 0;
-  }
+    cl_bool image_support;
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT,
+                          sizeof image_support, &image_support, NULL);
+    if (err)
+    {
+        test_error(err, __FUNCTION__);
+        return err;
+    }
+    if (!image_support)
+    {
+        log_info("Images are not supported by the device, skipping test...\n");
+        return 0;
+    }
 
 
-  cl_mem_object_type img_type[5] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY};
+    cl_mem_object_type img_type[5] = {
+        CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
 
-  size_t img_dims[5][3] = {{200, 1, 1}, {200, 80, 1}, {200, 80, 5}, {200, 1, 1}, {200, 80, 10}}; // in elements
+    size_t img_dims[5][3] = { { 200, 1, 1 },
+                              { 200, 80, 1 },
+                              { 200, 80, 5 },
+                              { 200, 1, 1 },
+                              { 200, 80, 10 } }; // in elements
 
-  size_t array_size[5] = {1, 10, 1, 10, 1};
+    size_t array_size[5] = { 1, 10, 1, 10, 1 };
 
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
-  for (int flag=0; flag<2; flag++)
-    for (int i=0; i<2; i++) // blocking
-    {
-      for(int p=0; p<3; p++)
-      {
-        err = test_mem_host_read_only_RW_Image(deviceID, context, queue, blocking[i],
-                                               buffer_mem_flags[flag], img_type[p],
-                                               array_size[p], img_dims[p]);
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
+    for (int flag = 0; flag < 2; flag++)
+        for (int i = 0; i < 2; i++) // blocking
+        {
+            for (int p = 0; p < 3; p++)
+            {
+                err = test_mem_host_read_only_RW_Image(
+                    deviceID, context, queue, blocking[i],
+                    buffer_mem_flags[flag], img_type[p], array_size[p],
+                    img_dims[p]);
 
-        test_error(err, __FUNCTION__);
+                test_error(err, __FUNCTION__);
 
-        err = test_mem_host_read_only_RW_Image_Mapping(deviceID, context, queue, blocking[i],
-                                                       buffer_mem_flags[flag], img_type[p],
-                                                       array_size[p], img_dims[p]);
+                err = test_mem_host_read_only_RW_Image_Mapping(
+                    deviceID, context, queue, blocking[i],
+                    buffer_mem_flags[flag], img_type[p], array_size[p],
+                    img_dims[p]);
 
-        test_error(err, __FUNCTION__);
-      }
-    }
+                test_error(err, __FUNCTION__);
+            }
+        }
 
-  return err;
+    return err;
 }
 
 //----------------------------
-static cl_int test_MEM_HOST_WRIE_ONLY_Image_RW (cl_device_id deviceID, cl_context context,
-                                                cl_command_queue queue, cl_bool blocking,
-                                                cl_mem_flags buffer_mem_flag,
-                                                cl_mem_object_type image_type_in,
-                                                size_t array_size, size_t *img_dim)
+static cl_int test_MEM_HOST_WRITE_ONLY_Image_RW(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_object_type image_type_in, size_t array_size, size_t *img_dim)
 {
-  log_info(" %s  ... \n ", __FUNCTION__);
-  cl_int err = CL_SUCCESS;
-
-  cImage_check_mem_host_write_only< int > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-
-  checker.m_cl_Image_desc.image_type = image_type_in;
-  checker.m_cl_Image_desc.image_width = img_dim[0];
-  checker.m_cl_Image_desc.image_height = img_dim[1];
-  checker.m_cl_Image_desc.image_depth = img_dim[2];
-  checker.m_cl_Image_desc.image_array_size = array_size;
-  checker.m_cl_Image_desc.image_row_pitch = 0;
-  checker.m_cl_Image_desc.image_slice_pitch = 0;
-  checker.m_cl_Image_desc.num_mip_levels = 0;
-  checker.m_cl_Image_desc.num_samples = 0;
-
-  checker.SetupImage();
-  checker.Init_rect();
-  checker.Setup_Test_Environment();
-
-  err = checker.verify_RW_Image();
-  clFinish(queue);
-  test_error(err, __FUNCTION__);
-
-  return err;
+    log_info(" %s  ... \n ", __FUNCTION__);
+    cl_int err = CL_SUCCESS;
+
+    cImage_check_mem_host_write_only<int> checker(deviceID, context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+
+    checker.m_cl_Image_desc.image_type = image_type_in;
+    checker.m_cl_Image_desc.image_width = img_dim[0];
+    checker.m_cl_Image_desc.image_height = img_dim[1];
+    checker.m_cl_Image_desc.image_depth = img_dim[2];
+    checker.m_cl_Image_desc.image_array_size = array_size;
+    checker.m_cl_Image_desc.image_row_pitch = 0;
+    checker.m_cl_Image_desc.image_slice_pitch = 0;
+    checker.m_cl_Image_desc.num_mip_levels = 0;
+    checker.m_cl_Image_desc.num_samples = 0;
+
+    checker.SetupImage();
+    checker.Init_rect();
+    checker.Setup_Test_Environment();
+
+    err = checker.verify_RW_Image();
+    clFinish(queue);
+    test_error(err, __FUNCTION__);
+
+    return err;
 }
 
-static cl_int test_MEM_HOST_WRITE_ONLY_Image_RW_Mapping(cl_device_id deviceID, cl_context context,
-                                                        cl_command_queue queue, cl_bool blocking,
-                                                        cl_mem_flags buffer_mem_flag,
-                                                        cl_mem_object_type image_type_in,
-                                                        size_t array_size, size_t *img_dim)
+static cl_int test_MEM_HOST_WRITE_ONLY_Image_RW_Mapping(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_object_type image_type_in, size_t array_size, size_t *img_dim)
 {
-  log_info("%s  ... \n ", __FUNCTION__);
-  cl_int err = CL_SUCCESS;
-
-  cImage_check_mem_host_write_only< int > checker(deviceID, context, queue);
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-
-  checker.m_cl_Image_desc.image_type = image_type_in;
-  checker.m_cl_Image_desc.image_width = img_dim[0];
-  checker.m_cl_Image_desc.image_height = img_dim[1];
-  checker.m_cl_Image_desc.image_depth = img_dim[2];
-  checker.m_cl_Image_desc.image_array_size = array_size;
-  checker.m_cl_Image_desc.image_row_pitch = 0;
-  checker.m_cl_Image_desc.image_slice_pitch = 0;
-  checker.m_cl_Image_desc.num_mip_levels = 0;
-  checker.m_cl_Image_desc.num_samples = 0;
-
-  checker.SetupImage();
-  checker.Init_rect();
-  checker.Setup_Test_Environment();
-
-  err = checker.verify_RW_Image_Mapping();
-  clFinish(queue);
-  test_error(err, __FUNCTION__);
-
-  return err;
+    log_info("%s  ... \n ", __FUNCTION__);
+    cl_int err = CL_SUCCESS;
+
+    cImage_check_mem_host_write_only<int> checker(deviceID, context, queue);
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+
+    checker.m_cl_Image_desc.image_type = image_type_in;
+    checker.m_cl_Image_desc.image_width = img_dim[0];
+    checker.m_cl_Image_desc.image_height = img_dim[1];
+    checker.m_cl_Image_desc.image_depth = img_dim[2];
+    checker.m_cl_Image_desc.image_array_size = array_size;
+    checker.m_cl_Image_desc.image_row_pitch = 0;
+    checker.m_cl_Image_desc.image_slice_pitch = 0;
+    checker.m_cl_Image_desc.num_mip_levels = 0;
+    checker.m_cl_Image_desc.num_samples = 0;
+
+    checker.SetupImage();
+    checker.Init_rect();
+    checker.Setup_Test_Environment();
+
+    err = checker.verify_RW_Image_Mapping();
+    clFinish(queue);
+    test_error(err, __FUNCTION__);
+
+    return err;
 }
 
 int test_mem_host_write_only_image(cl_device_id deviceID, cl_context context,
                                    cl_command_queue queue, int num_elements)
 {
-  cl_mem_flags buffer_mem_flags[2] = { CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY };
-
-  cl_int err = CL_SUCCESS;
-
-  cl_bool image_support;
-  err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof image_support, &image_support, NULL);
-  if (err) {
-    test_error(err, __FUNCTION__);
-    return err;
-  }
-  if (!image_support) {
-    log_info("Images are not supported by the device, skipping test...\n");
-    return 0;
-  }
-
-  cl_mem_object_type img_type[5]= {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,
-    CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY};
+    cl_mem_flags buffer_mem_flags[2] = {
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_WRITE_ONLY
+    };
 
-  size_t img_dims[5][3]= {{200, 1, 1}, {200, 80, 1}, {200, 80, 5}, {200, 1, 1}, {200, 80, 1}  }; // in elements
+    cl_int err = CL_SUCCESS;
 
-  size_t array_size[5] = {1, 10, 1, 10, 1};
-
-  cl_bool blocking[2] = {CL_TRUE, CL_FALSE};
-  for (int k=0; k<2; k++)
-    for (int i=0; i<2; i++) // blocking
+    cl_bool image_support;
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT,
+                          sizeof image_support, &image_support, NULL);
+    if (err)
     {
-      for (int p=0; p<3; p++)
-      {
-        err = test_MEM_HOST_WRIE_ONLY_Image_RW(deviceID, context, queue, blocking[i],
-                                               buffer_mem_flags[k],  img_type[p], array_size[p], img_dims[p]);
         test_error(err, __FUNCTION__);
-
-        err = test_MEM_HOST_WRITE_ONLY_Image_RW_Mapping(deviceID, context, queue, blocking[i],
-                                                        buffer_mem_flags[k], img_type[p], array_size[p], img_dims[p]);
-        test_error(err, __FUNCTION__);
-      }
+        return err;
+    }
+    if (!image_support)
+    {
+        log_info("Images are not supported by the device, skipping test...\n");
+        return 0;
     }
 
-  return err;
+    cl_mem_object_type img_type[5] = {
+        CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    size_t img_dims[5][3] = { { 200, 1, 1 },
+                              { 200, 80, 1 },
+                              { 200, 80, 5 },
+                              { 200, 1, 1 },
+                              { 200, 80, 1 } }; // in elements
+
+    size_t array_size[5] = { 1, 10, 1, 10, 1 };
+
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
+    for (int k = 0; k < 2; k++)
+        for (int i = 0; i < 2; i++) // blocking
+        {
+            for (int p = 0; p < 3; p++)
+            {
+                err = test_MEM_HOST_WRITE_ONLY_Image_RW(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    img_type[p], array_size[p], img_dims[p]);
+                test_error(err, __FUNCTION__);
+
+                err = test_MEM_HOST_WRITE_ONLY_Image_RW_Mapping(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    img_type[p], array_size[p], img_dims[p]);
+                test_error(err, __FUNCTION__);
+            }
+        }
+
+    return err;
 }
 
 //--------
 
-static cl_int test_mem_host_no_access_Image_RW(cl_device_id deviceID, cl_context context,
-                                               cl_command_queue queue, cl_bool blocking,
-                                               cl_mem_flags buffer_mem_flag,
-                                               cl_mem_object_type image_type_in,
-                                               size_t array_size, size_t *img_dim)
-{
-  log_info("%s  ... \n", __FUNCTION__);
-  cl_int err = CL_SUCCESS;
-
-  cImage_check_mem_host_no_access< int > checker(deviceID, context, queue);
-
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-
-  checker.m_cl_Image_desc.image_type = image_type_in;
-  checker.m_cl_Image_desc.image_width = img_dim[0];
-  checker.m_cl_Image_desc.image_height = img_dim[1];
-  checker.m_cl_Image_desc.image_depth = img_dim[2];
-  checker.m_cl_Image_desc.image_array_size = array_size;
-  checker.m_cl_Image_desc.image_row_pitch = 0;
-  checker.m_cl_Image_desc.image_slice_pitch = 0;
-  checker.m_cl_Image_desc.num_mip_levels = 0;
-  checker.m_cl_Image_desc.num_samples = 0;
-
-  checker.SetupImage();
-  checker.Init_rect();
-  checker.Setup_Test_Environment();
-  err = checker.verify_RW_Image();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-  return err;
-}
-
-static cl_int test_mem_host_no_access_Image_RW_Mapping(cl_device_id deviceID, cl_context context,
-                                                       cl_command_queue queue, cl_bool blocking,
-                                                       cl_mem_flags buffer_mem_flag,
-                                                       cl_mem_object_type image_type_in,
-                                                       size_t array_size, size_t *img_dim)
+static cl_int test_mem_host_no_access_Image_RW(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_object_type image_type_in, size_t array_size, size_t *img_dim)
 {
-  log_info("%s  ... \n ", __FUNCTION__);
-  cl_int err =CL_SUCCESS;
-
-  cImage_check_mem_host_no_access< int > checker(deviceID, context, queue);
-
-  checker.m_blocking = blocking;
-  checker.buffer_mem_flag = buffer_mem_flag;
-
-  checker.m_cl_Image_desc.image_type = image_type_in;
-  checker.m_cl_Image_desc.image_width = img_dim[0];
-  checker.m_cl_Image_desc.image_height = img_dim[1];
-  checker.m_cl_Image_desc.image_depth = img_dim[2];
-  checker.m_cl_Image_desc.image_array_size = array_size;
-  checker.m_cl_Image_desc.image_row_pitch = 0;
-  checker.m_cl_Image_desc.image_slice_pitch = 0;
-  checker.m_cl_Image_desc.num_mip_levels = 0;
-  checker.m_cl_Image_desc.num_samples = 0;
-
-  checker.SetupImage();
-  checker.Init_rect();
-  checker.Setup_Test_Environment();
-  err = checker.verify_RW_Image_Mapping();
-  test_error(err, __FUNCTION__);
-  clFinish(queue);
-  return err;
+    log_info("%s  ... \n", __FUNCTION__);
+    cl_int err = CL_SUCCESS;
+
+    cImage_check_mem_host_no_access<int> checker(deviceID, context, queue);
+
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+
+    checker.m_cl_Image_desc.image_type = image_type_in;
+    checker.m_cl_Image_desc.image_width = img_dim[0];
+    checker.m_cl_Image_desc.image_height = img_dim[1];
+    checker.m_cl_Image_desc.image_depth = img_dim[2];
+    checker.m_cl_Image_desc.image_array_size = array_size;
+    checker.m_cl_Image_desc.image_row_pitch = 0;
+    checker.m_cl_Image_desc.image_slice_pitch = 0;
+    checker.m_cl_Image_desc.num_mip_levels = 0;
+    checker.m_cl_Image_desc.num_samples = 0;
+
+    checker.SetupImage();
+    checker.Init_rect();
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Image();
+    test_error(err, __FUNCTION__);
+    clFinish(queue);
+    return err;
 }
 
-int  test_mem_host_no_access_image(cl_device_id deviceID, cl_context context,
-                                   cl_command_queue queue, int num_elements)
+static cl_int test_mem_host_no_access_Image_RW_Mapping(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    cl_bool blocking, cl_mem_flags buffer_mem_flag,
+    cl_mem_object_type image_type_in, size_t array_size, size_t *img_dim)
 {
-  cl_mem_flags buffer_mem_flags[2] = {CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
-    CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS };
-
-  cl_int err = CL_SUCCESS;
-
-  cl_bool image_support;
-  err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof image_support, &image_support, NULL);
-  if (err) {
+    log_info("%s  ... \n ", __FUNCTION__);
+    cl_int err = CL_SUCCESS;
+
+    cImage_check_mem_host_no_access<int> checker(deviceID, context, queue);
+
+    checker.m_blocking = blocking;
+    checker.buffer_mem_flag = buffer_mem_flag;
+
+    checker.m_cl_Image_desc.image_type = image_type_in;
+    checker.m_cl_Image_desc.image_width = img_dim[0];
+    checker.m_cl_Image_desc.image_height = img_dim[1];
+    checker.m_cl_Image_desc.image_depth = img_dim[2];
+    checker.m_cl_Image_desc.image_array_size = array_size;
+    checker.m_cl_Image_desc.image_row_pitch = 0;
+    checker.m_cl_Image_desc.image_slice_pitch = 0;
+    checker.m_cl_Image_desc.num_mip_levels = 0;
+    checker.m_cl_Image_desc.num_samples = 0;
+
+    checker.SetupImage();
+    checker.Init_rect();
+    checker.Setup_Test_Environment();
+    err = checker.verify_RW_Image_Mapping();
     test_error(err, __FUNCTION__);
+    clFinish(queue);
     return err;
-  }
-  if (!image_support) {
-    log_info("Images are not supported by the device, skipping test...\n");
-    return 0;
-  }
-
-  cl_mem_object_type img_type[5] = {CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,
-    CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY};
+}
 
-  size_t img_dims[5][3]= {{200, 1, 1}, {200, 80, 1}, {100, 80, 5}, {200, 1, 1}, {200, 80, 1}}; // in elements
+int test_mem_host_no_access_image(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
+{
+    cl_mem_flags buffer_mem_flags[2] = {
+        CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
+        CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS
+    };
 
-  size_t array_size [5] = {1, 1, 1, 10, 10};
+    cl_int err = CL_SUCCESS;
 
-  cl_bool blocking[2] = { CL_TRUE, CL_FALSE};
-  for (int k=0; k<2; k++)
-    for (int i=0; i<2; i++) // blocking
+    cl_bool image_support;
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_SUPPORT,
+                          sizeof image_support, &image_support, NULL);
+    if (err)
+    {
+        test_error(err, __FUNCTION__);
+        return err;
+    }
+    if (!image_support)
     {
-      for (int p =0; p<3; p++)
-      {
-        err += test_mem_host_no_access_Image_RW (deviceID, context, queue, blocking[i],
-                                                buffer_mem_flags[k], img_type[p], array_size[p], img_dims[p]);
-
-        err +=  test_mem_host_no_access_Image_RW_Mapping(deviceID, context, queue, blocking[i],
-                                                       buffer_mem_flags[k], img_type[p], array_size[p], img_dims[p]);
-      }
+        log_info("Images are not supported by the device, skipping test...\n");
+        return 0;
     }
 
-  return  err;
+    cl_mem_object_type img_type[5] = {
+        CL_MEM_OBJECT_IMAGE1D, CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE3D,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    size_t img_dims[5][3] = { { 200, 1, 1 },
+                              { 200, 80, 1 },
+                              { 100, 80, 5 },
+                              { 200, 1, 1 },
+                              { 200, 80, 1 } }; // in elements
+
+    size_t array_size[5] = { 1, 1, 1, 10, 10 };
+
+    cl_bool blocking[2] = { CL_TRUE, CL_FALSE };
+    for (int k = 0; k < 2; k++)
+        for (int i = 0; i < 2; i++) // blocking
+        {
+            for (int p = 0; p < 3; p++)
+            {
+                err += test_mem_host_no_access_Image_RW(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    img_type[p], array_size[p], img_dims[p]);
+
+                err += test_mem_host_no_access_Image_RW_Mapping(
+                    deviceID, context, queue, blocking[i], buffer_mem_flags[k],
+                    img_type[p], array_size[p], img_dims[p]);
+            }
+        }
+
+    return err;
 }
diff --git a/test_conformance/mem_host_flags/procs.h b/test_conformance/mem_host_flags/procs.h
index 98f81491..87d48922 100644
--- a/test_conformance/mem_host_flags/procs.h
+++ b/test_conformance/mem_host_flags/procs.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -20,26 +20,44 @@
 
 #define NUM_FLAGS 4
 
-extern int test_mem_host_read_only_buffer(cl_device_id deviceID, cl_context context,
-                                          cl_command_queue queue, int num_elements);
-extern int test_mem_host_read_only_subbuffer(cl_device_id deviceID, cl_context context,
-                                             cl_command_queue queue, int num_elements);
+extern int test_mem_host_read_only_buffer(cl_device_id deviceID,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_mem_host_read_only_subbuffer(cl_device_id deviceID,
+                                             cl_context context,
+                                             cl_command_queue queue,
+                                             int num_elements);
 
-extern int test_mem_host_write_only_buffer(cl_device_id deviceID, cl_context context,
-                                           cl_command_queue queue, int num_elements);
-extern int test_mem_host_write_only_subbuffer(cl_device_id deviceID, cl_context context,
-                                              cl_command_queue queue, int num_elements);
+extern int test_mem_host_write_only_buffer(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
+extern int test_mem_host_write_only_subbuffer(cl_device_id deviceID,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements);
 
-extern int test_mem_host_no_access_buffer(cl_device_id deviceID, cl_context context,
-                                          cl_command_queue queue, int num_elements);
-extern int test_mem_host_no_access_subbuffer(cl_device_id deviceID, cl_context context,
-                                             cl_command_queue queue, int num_elements);
+extern int test_mem_host_no_access_buffer(cl_device_id deviceID,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_mem_host_no_access_subbuffer(cl_device_id deviceID,
+                                             cl_context context,
+                                             cl_command_queue queue,
+                                             int num_elements);
 
-extern int test_mem_host_read_only_image(cl_device_id deviceID, cl_context context,
-                                         cl_command_queue queue, int num_elements);
-extern int test_mem_host_write_only_image(cl_device_id deviceID, cl_context context,
-                                          cl_command_queue queue, int num_elements);
-extern int test_mem_host_no_access_image(cl_device_id deviceID, cl_context context,
-                                         cl_command_queue queue, int num_elements);
+extern int test_mem_host_read_only_image(cl_device_id deviceID,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_mem_host_write_only_image(cl_device_id deviceID,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_mem_host_no_access_image(cl_device_id deviceID,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
 
 #endif // #ifndef __PROCS_H__
diff --git a/test_conformance/mem_host_flags/testBase.h b/test_conformance/mem_host_flags/testBase.h
index 30dbf749..bd6b30bd 100644
--- a/test_conformance/mem_host_flags/testBase.h
+++ b/test_conformance/mem_host_flags/testBase.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,16 +23,15 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#if !defined (__APPLE__)
+#if !defined(__APPLE__)
 #include <CL/cl.h>
 #else
-//#include <OpenCL/cl.h>
+// #include <OpenCL/cl.h>
 #endif
 
 #include "harness/imageHelpers.h"
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
diff --git a/test_conformance/multiple_device_context/test_multiple_devices.cpp b/test_conformance/multiple_device_context/test_multiple_devices.cpp
index 4f187b9c..7a2a3492 100644
--- a/test_conformance/multiple_device_context/test_multiple_devices.cpp
+++ b/test_conformance/multiple_device_context/test_multiple_devices.cpp
@@ -42,7 +42,7 @@ int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices
     clProgramWrapper program;
     clKernelWrapper kernels[2];
     clMemWrapper      stream;
-    clCommandQueueWrapper queues[MAX_QUEUES];
+    clCommandQueueWrapper queues[MAX_QUEUES] = {};
     size_t    threads[1], localThreads[1];
     cl_uint data[TEST_SIZE];
     cl_uint outputData[TEST_SIZE];
@@ -50,8 +50,6 @@ int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices
     cl_uint expectedResultsOneDevice[MAX_DEVICES][TEST_SIZE];
     size_t i;
 
-  memset(queues, 0, sizeof(queues));
-
     RandomSeed seed( gRandomSeed );
 
     if (deviceCount > MAX_DEVICES) {
diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt
index 30c3a846..f78dd195 100644
--- a/test_conformance/non_uniform_work_group/CMakeLists.txt
+++ b/test_conformance/non_uniform_work_group/CMakeLists.txt
@@ -10,6 +10,8 @@ set(${MODULE_NAME}_SOURCES
     tools.cpp
 )
 
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+
 include(../CMakeCommon.txt)
 
 # end of file #
diff --git a/test_conformance/printf/CMakeLists.txt b/test_conformance/printf/CMakeLists.txt
index 8f03dca9..39a520c8 100644
--- a/test_conformance/printf/CMakeLists.txt
+++ b/test_conformance/printf/CMakeLists.txt
@@ -1,7 +1,5 @@
 set(MODULE_NAME PRINTF)
 
-set(CMAKE_CXX_STANDARD 11)
-
 set(${MODULE_NAME}_SOURCES
          test_printf.cpp
          util_printf.cpp
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index d638cd46..e43e302f 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
+#include "harness/os_helpers.h"
 
 #include <string.h>
 #include <errno.h>
@@ -40,7 +40,6 @@
 #include "harness/testHarness.h"
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/mt19937.h"
 #include "harness/parseParameters.h"
 
 #include <CL/cl_ext.h>
@@ -116,29 +115,6 @@ static char gFileName[256];
 //-----------------------------------------
 
 //-----------------------------------------
-// getTempFileName
-//-----------------------------------------
-static int getTempFileName()
-{
-    // Create a unique temporary file to allow parallel executed tests.
-#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ ))
-    sprintf(gFileName, "/tmp/tmpfile.XXXXXX");
-    int fd = mkstemp(gFileName);
-    if (fd == -1)
-        return -1;
-    close(fd);
-#elif defined(_WIN32)
-    UINT ret = GetTempFileName(".", "tmp", 0, gFileName);
-    if (ret == 0)
-        return -1;
-#else
-    MTdata d = init_genrand((cl_uint)time(NULL));
-    sprintf(gFileName, "tmpfile.%u", genrand_int32(d));
-#endif
-    return 0;
-}
-
-//-----------------------------------------
 // acquireOutputStream
 //-----------------------------------------
 static int acquireOutputStream(int* error)
@@ -237,10 +213,13 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
     char testname[256] = {0};
     char addrSpaceArgument[256] = {0};
     char addrSpacePAddArgument[256] = {0};
+    char extension[128] = { 0 };
 
     //Program Source code for int,float,octal,hexadecimal,char,string
-    const char *sourceGen[] = {
-        "__kernel void ", testname,
+    const char* sourceGen[] = {
+        extension,
+        "__kernel void ",
+        testname,
         "(void)\n",
         "{\n"
         "   printf(\"",
@@ -251,8 +230,10 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
         "}\n"
     };
     //Program Source code for vector
-    const char *sourceVec[] = {
-        "__kernel void ", testname,
+    const char* sourceVec[] = {
+        extension,
+        "__kernel void ",
+        testname,
         "(void)\n",
         "{\n",
         allTestCase[testId]->_genParameters[testNum].dataType,
@@ -287,23 +268,39 @@ static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context cont
     };
 
     //Update testname
-    sprintf(testname,"%s%d","test",testId);
+    std::snprintf(testname, sizeof(testname), "%s%d", "test", testId);
+
+    if (allTestCase[testId]->_type == TYPE_HALF
+        || allTestCase[testId]->_type == TYPE_HALF_LIMITS)
+        strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+
 
     //Update addrSpaceArgument and addrSpacePAddArgument types, based on FULL_PROFILE/EMBEDDED_PROFILE
     if(allTestCase[testId]->_type == TYPE_ADDRESS_SPACE)
     {
-        sprintf(addrSpaceArgument, "%s",allTestCase[testId]->_genParameters[testNum].addrSpaceArgumentTypeQualifier);
-
-        sprintf(addrSpacePAddArgument, "%s", allTestCase[testId]->_genParameters[testNum].addrSpacePAdd);
+        std::snprintf(addrSpaceArgument, sizeof(addrSpaceArgument), "%s",
+                      allTestCase[testId]
+                          ->_genParameters[testNum]
+                          .addrSpaceArgumentTypeQualifier);
+
+        std::snprintf(
+            addrSpacePAddArgument, sizeof(addrSpacePAddArgument), "%s",
+            allTestCase[testId]->_genParameters[testNum].addrSpacePAdd);
     }
 
     if (strlen(addrSpaceArgument) == 0)
-        sprintf(addrSpaceArgument,"void");
+        std::snprintf(addrSpaceArgument, sizeof(addrSpaceArgument), "void");
 
     // create program based on its type
 
     if(allTestCase[testId]->_type == TYPE_VECTOR)
     {
+        if (strcmp(allTestCase[testId]->_genParameters[testNum].dataType,
+                   "half")
+            == 0)
+            strcpy(extension,
+                   "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+
         err = create_single_kernel_helper(
             context, &program, kernel_ptr,
             sizeof(sourceVec) / sizeof(sourceVec[0]), sourceVec, testname);
@@ -404,8 +401,27 @@ static bool is64bAddressSpace(cl_device_id  device_id)
 //-----------------------------------------
 static int doTest(cl_command_queue queue, cl_context context, const unsigned int testId, const unsigned int testNum, cl_device_id device)
 {
+    if ((allTestCase[testId]->_type == TYPE_HALF
+         || allTestCase[testId]->_type == TYPE_HALF_LIMITS)
+        && !is_extension_available(device, "cl_khr_fp16"))
+    {
+        log_info(
+            "Skipping half because cl_khr_fp16 extension is not supported.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
     if(allTestCase[testId]->_type == TYPE_VECTOR)
     {
+        if ((strcmp(allTestCase[testId]->_genParameters[testNum].dataType,
+                    "half")
+             == 0)
+            && !is_extension_available(device, "cl_khr_fp16"))
+        {
+            log_info("Skipping half because cl_khr_fp16 extension is not "
+                     "supported.\n");
+            return TEST_SKIPPED_ITSELF;
+        }
+
         log_info("%d)testing printf(\"%sv%s%s\",%s)\n",testNum,allTestCase[testId]->_genParameters[testNum].vectorFormatFlag,allTestCase[testId]->_genParameters[testNum].vectorSize,
                  allTestCase[testId]->_genParameters[testNum].vectorFormatSpecifier,allTestCase[testId]->_genParameters[testNum].dataRepresentation);
     }
@@ -614,6 +630,75 @@ int test_int_8(cl_device_id deviceID, cl_context context, cl_command_queue queue
 }
 
 
+int test_half_0(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 0, deviceID);
+}
+int test_half_1(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 1, deviceID);
+}
+int test_half_2(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 2, deviceID);
+}
+int test_half_3(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 3, deviceID);
+}
+int test_half_4(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 4, deviceID);
+}
+int test_half_5(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 5, deviceID);
+}
+int test_half_6(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 6, deviceID);
+}
+int test_half_7(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 7, deviceID);
+}
+int test_half_8(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 8, deviceID);
+}
+int test_half_9(cl_device_id deviceID, cl_context context,
+                cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF, 9, deviceID);
+}
+
+
+int test_half_limits_0(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 0, deviceID);
+}
+int test_half_limits_1(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 1, deviceID);
+}
+int test_half_limits_2(cl_device_id deviceID, cl_context context,
+                       cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_HALF_LIMITS, 2, deviceID);
+}
+
+
 int test_float_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
     return doTest(gQueue, gContext, TYPE_FLOAT, 0, deviceID);
@@ -800,6 +885,11 @@ int test_vector_4(cl_device_id deviceID, cl_context context, cl_command_queue qu
 {
     return doTest(gQueue, gContext, TYPE_VECTOR, 4, deviceID);
 }
+int test_vector_5(cl_device_id deviceID, cl_context context,
+                  cl_command_queue queue, int num_elements)
+{
+    return doTest(gQueue, gContext, TYPE_VECTOR, 5, deviceID);
+}
 
 
 int test_address_space_0(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
@@ -855,6 +945,15 @@ test_definition test_list[] = {
     ADD_TEST(int_6),           ADD_TEST(int_7),
     ADD_TEST(int_8),
 
+    ADD_TEST(half_0),          ADD_TEST(half_1),
+    ADD_TEST(half_2),          ADD_TEST(half_3),
+    ADD_TEST(half_4),          ADD_TEST(half_5),
+    ADD_TEST(half_6),          ADD_TEST(half_7),
+    ADD_TEST(half_8),          ADD_TEST(half_9),
+
+    ADD_TEST(half_limits_0),   ADD_TEST(half_limits_1),
+    ADD_TEST(half_limits_2),
+
     ADD_TEST(float_0),         ADD_TEST(float_1),
     ADD_TEST(float_2),         ADD_TEST(float_3),
     ADD_TEST(float_4),         ADD_TEST(float_5),
@@ -885,7 +984,7 @@ test_definition test_list[] = {
 
     ADD_TEST(vector_0),        ADD_TEST(vector_1),
     ADD_TEST(vector_2),        ADD_TEST(vector_3),
-    ADD_TEST(vector_4),
+    ADD_TEST(vector_4),        ADD_TEST(vector_5),
 
     ADD_TEST(address_space_0), ADD_TEST(address_space_1),
     ADD_TEST(address_space_2), ADD_TEST(address_space_3),
@@ -946,9 +1045,17 @@ int main(int argc, const char* argv[])
         }
     }
 
-    if (getTempFileName() == -1)
+    char* pcTempFname = get_temp_filename();
+    if (pcTempFname != nullptr)
+    {
+        strncpy(gFileName, pcTempFname, sizeof(gFileName));
+    }
+
+    free(pcTempFname);
+
+    if (strlen(gFileName) == 0)
     {
-        log_error("getTempFileName failed\n");
+        log_error("get_temp_filename failed\n");
         return -1;
     }
 
@@ -1056,6 +1163,24 @@ test_status InitCL( cl_device_id device )
 
     releaseOutputStream(gFd);
 
+    if (is_extension_available(device, "cl_khr_fp16"))
+    {
+        const cl_device_fp_config fpConfigHalf =
+            get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
+        if (fpConfigHalf == CL_FP_ROUND_TO_NEAREST)
+        {
+            half_rounding_mode = CL_HALF_RTE;
+        }
+        else if (fpConfigHalf == CL_FP_ROUND_TO_ZERO)
+        {
+            half_rounding_mode = CL_HALF_RTZ;
+        }
+        else
+        {
+            log_error("Error while acquiring half rounding mode");
+        }
+    }
+
     // Generate reference results
     generateRef(device);
 
diff --git a/test_conformance/printf/test_printf.h b/test_conformance/printf/test_printf.h
index 038a7b9c..8eb2a032 100644
--- a/test_conformance/printf/test_printf.h
+++ b/test_conformance/printf/test_printf.h
@@ -32,6 +32,8 @@
 #include <CL/cl_platform.h>
 #endif
 
+#include <CL/cl_half.h>
+
 #define ANALYSIS_BUFFER_SIZE 256
 
 //-----------------------------------------
@@ -42,18 +44,20 @@
 // Types
 //-----------------------------------------
 enum PrintfTestType
- {
-     TYPE_INT,
-     TYPE_FLOAT,
-     TYPE_FLOAT_LIMITS,
-     TYPE_OCTAL,
-     TYPE_UNSIGNED,
-     TYPE_HEXADEC,
-     TYPE_CHAR,
-     TYPE_STRING,
-     TYPE_VECTOR,
-     TYPE_ADDRESS_SPACE,
-     TYPE_COUNT
+{
+    TYPE_INT,
+    TYPE_HALF,
+    TYPE_HALF_LIMITS,
+    TYPE_FLOAT,
+    TYPE_FLOAT_LIMITS,
+    TYPE_OCTAL,
+    TYPE_UNSIGNED,
+    TYPE_HEXADEC,
+    TYPE_CHAR,
+    TYPE_STRING,
+    TYPE_VECTOR,
+    TYPE_ADDRESS_SPACE,
+    TYPE_COUNT
 };
 
 struct printDataGenParameters
@@ -72,6 +76,7 @@ struct printDataGenParameters
 
 // Reference results - filled out at run-time
 static std::vector<std::string> correctBufferInt;
+static std::vector<std::string> correctBufferHalf;
 static std::vector<std::string> correctBufferFloat;
 static std::vector<std::string> correctBufferOctal;
 static std::vector<std::string> correctBufferUnsigned;
@@ -103,6 +108,9 @@ struct testCase
 
 extern const char* strType[];
 extern std::vector<testCase*> allTestCase;
+extern cl_half_rounding_mode half_rounding_mode;
+
+//-----------------------------------------
 
 size_t verifyOutputBuffer(char *analysisBuffer,testCase* pTestCase,size_t testId,cl_ulong pAddr = 0);
 
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index d45e1d43..6b310a99 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -13,15 +13,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#include "harness/compat.h"
 #include "harness/rounding_mode.h"
 #include "harness/kernelHelpers.h"
 
 #include "test_printf.h"
 #include <assert.h>
+#include <CL/cl_half.h>
+
 
 // Helpers for generating runtime reference results
 static void intRefBuilder(printDataGenParameters&, char*, const size_t);
+static void halfRefBuilder(printDataGenParameters&, char* rResult,
+                           const size_t);
 static void floatRefBuilder(printDataGenParameters&, char* rResult, const size_t);
 static void octalRefBuilder(printDataGenParameters&, char*, const size_t);
 static void unsignedRefBuilder(printDataGenParameters&, char*, const size_t);
@@ -100,7 +103,150 @@ testCase testCaseInt = {
 };
 
 
+//==============================================
+
+// half
+
+//==============================================
+
+//--------------------------------------------------------
+
+// [string] format |  [string] float-data representation |
+
+//--------------------------------------------------------
+
+std::vector<printDataGenParameters> printHalfGenParameters = {
+
+    // Default(right)-justified
+
+    { "%f", "1.234h" },
+
+    // One position after the decimal,default(right)-justified
+
+    { "%4.2f", "1.2345h" },
+
+    // Zero positions after the
+    // decimal([floor]rounding),default(right)-justified
+
+    { "%.0f", "0.1h" },
+
+    // Zero positions after the decimal([ceil]rounding),default(right)-justified
+
+    { "%.0f", "0.6h" },
+
+    // Zero-filled,default positions number after the
+    // decimal,default(right)-justified
+
+    { "%0f", "0.6h" },
+
+    // Double argument representing floating-point,used by f
+    // style,default(right)-justified
+
+    { "%4g", "5.678h" },
+
+    // Double argument representing floating-point,used by e
+    // style,default(right)-justified
+
+    { "%4.2g", "5.678h" },
+
+    // Double argument representing floating-point,used by e
+    // style,default(right)-justified
+
+    { "%4G", "0.000062h" },
+
+    // Double argument representing floating-point,with
+    // exponent,left-justified,default(right)-justified
+
+    { "%-#20.15e", "65504.0h" },
+
+    // Double argument representing floating-point,with
+    // exponent,left-justified,with sign,capital E,default(right)-justified
+
+    { "%+#21.15E", "-65504.0h" },
+};
+
+//---------------------------------------------------------
+
+// Test case for float                                     |
+
+//---------------------------------------------------------
+
+testCase testCaseHalf = {
+
+    TYPE_HALF,
+
+    correctBufferHalf,
+
+    printHalfGenParameters,
+
+    halfRefBuilder,
+
+    kfloat
+
+};
+
+
+//==============================================
+
+// half limits
+
+//==============================================
+
+
+//--------------------------------------------------------
+
+// [string] format |  [string] float-data representation |
+
+//--------------------------------------------------------
+
+
+std::vector<printDataGenParameters> printHalfLimitsGenParameters = {
+
+    // Infinity (1.0/0.0)
+
+    { "%f", "1.0h/0.0h" },
+
+    // NaN
+
+    { "%f", "sqrt(-1.0h)" },
+
+    // NaN
+    { "%f", "acospi(2.0h)" }
+
+};
+//--------------------------------------------------------
+
+//  Lookup table - [string]float-correct buffer             |
+
+//--------------------------------------------------------
+
+std::vector<std::string> correctBufferHalfLimits = {
 
+    "inf",
+
+    "-nan",
+
+    "nan"
+
+};
+
+//---------------------------------------------------------
+
+// Test case for float                                     |
+
+//---------------------------------------------------------
+
+testCase testCaseHalfLimits = {
+
+    TYPE_HALF_LIMITS,
+
+    correctBufferHalfLimits,
+
+    printHalfLimitsGenParameters,
+
+    NULL
+
+};
 
 
 //==============================================
@@ -229,17 +375,18 @@ testCase testCaseFloat = {
 
 std::vector<printDataGenParameters> printFloatLimitsGenParameters = {
 
-    //Infinity (1.0/0.0)
+    // Infinity (1.0/0.0)
 
-    {"%f","1.0f/0.0f"},
+    { "%f", "1.0f/0.0f" },
 
-    //NaN
+    // NaN
 
-    {"%f","sqrt(-1.0f)"},
+    { "%f", "sqrt(-1.0f)" },
 
-    //NaN
-    {"%f","acospi(2.0f)"}
-    };
+    // NaN
+    { "%f", "acospi(2.0f)" }
+
+};
 //--------------------------------------------------------
 
 //  Lookup table - [string]float-correct buffer             |
@@ -253,6 +400,7 @@ std::vector<std::string> correctBufferFloatLimits = {
     "-nan",
 
     "nan"
+
 };
 
 //---------------------------------------------------------
@@ -593,24 +741,27 @@ std::vector<printDataGenParameters> printVectorGenParameters = {
 
     //(Minimum)Two-wide,two positions after decimal
 
-    {NULL,"(1.0f,2.0f,3.0f,4.0f)","%2.2","hlf","float","4"},
+    { NULL, "(1.0f,2.0f,3.0f,4.0f)", "%2.2", "hlf", "float", "4" },
 
-    //Alternative form,uchar argument
+    // Alternative form,uchar argument
 
-    {NULL,"(0xFA,0xFB)","%#","hhx","uchar","2"},
+    { NULL, "(0xFA,0xFB)", "%#", "hhx", "uchar", "2" },
 
-    //Alternative form,ushort argument
+    // Alternative form,ushort argument
 
-    {NULL,"(0x1234,0x8765)","%#","hx","ushort","2"},
+    { NULL, "(0x1234,0x8765)", "%#", "hx", "ushort", "2" },
 
-  //Alternative form,uint argument
+    // Alternative form,uint argument
 
-    {NULL,"(0x12345678,0x87654321)","%#","hlx","uint","2"},
+    { NULL, "(0x12345678,0x87654321)", "%#", "hlx", "uint", "2" },
 
-    //Alternative form,long argument
+    // Alternative form,long argument
 
-    {NULL,"(12345678,98765432)","%","ld","long","2"}
+    { NULL, "(12345678,98765432)", "%", "ld", "long", "2" },
 
+    //(Minimum)Two-wide,two positions after decimal
+
+    { NULL, "(1.0h,2.0h,3.0h,4.0h)", "%2.2", "hf", "half", "4" }
 };
 
 //------------------------------------------------------------
@@ -627,9 +778,11 @@ std::vector<std::string> correctBufferVector = {
 
     "0x1234,0x8765",
 
-  "0x12345678,0x87654321",
+    "0x12345678,0x87654321",
+
+    "12345678,98765432",
 
-    "12345678,98765432"
+    "1.00,2.00,3.00,4.00"
 
 };
 
@@ -731,8 +884,16 @@ testCase testCaseAddrSpace = {
 
 //-------------------------------------------------------------------------------
 
-std::vector<testCase*> allTestCase = {&testCaseInt,&testCaseFloat,&testCaseFloatLimits,&testCaseOctal,&testCaseUnsigned,&testCaseHexadecimal,&testCaseChar,&testCaseString,&testCaseVector,&testCaseAddrSpace};
+std::vector<testCase*> allTestCase = {
+    &testCaseInt,      &testCaseHalf,        &testCaseHalfLimits,
+    &testCaseFloat,    &testCaseFloatLimits, &testCaseOctal,
+    &testCaseUnsigned, &testCaseHexadecimal, &testCaseChar,
+    &testCaseString,   &testCaseVector,      &testCaseAddrSpace
+};
 
+//-----------------------------------------
+
+cl_half_rounding_mode half_rounding_mode = CL_HALF_RTE;
 
 //-----------------------------------------
 
@@ -807,6 +968,14 @@ static void intRefBuilder(printDataGenParameters& params, char* refResult, const
     snprintf(refResult, refSize, params.genericFormat, atoi(params.dataRepresentation));
 }
 
+static void halfRefBuilder(printDataGenParameters& params, char* refResult,
+                           const size_t refSize)
+{
+    cl_half val = cl_half_from_float(strtof(params.dataRepresentation, NULL),
+                                     half_rounding_mode);
+    snprintf(refResult, refSize, params.genericFormat, cl_half_to_float(val));
+}
+
 static void floatRefBuilder(printDataGenParameters& params, char* refResult, const size_t refSize)
 {
     snprintf(refResult, refSize, params.genericFormat, strtof(params.dataRepresentation, NULL));
@@ -842,24 +1011,30 @@ static void hexRefBuilder(printDataGenParameters& params, char* refResult, const
 */
 void generateRef(const cl_device_id device)
 {
-    const cl_device_fp_config fpConfig = get_default_rounding_mode(device);
+    const cl_device_fp_config fpConfigSingle =
+        get_default_rounding_mode(device);
+    const cl_device_fp_config fpConfigHalf = (half_rounding_mode == CL_HALF_RTE)
+        ? CL_FP_ROUND_TO_NEAREST
+        : CL_FP_ROUND_TO_ZERO;
     const RoundingMode hostRound = get_round();
-    RoundingMode deviceRound;
 
     // Map device rounding to CTS rounding type
     // get_default_rounding_mode supports RNE and RTZ
-    if (fpConfig == CL_FP_ROUND_TO_NEAREST)
-    {
-        deviceRound = kRoundToNearestEven;
-    }
-    else if (fpConfig == CL_FP_ROUND_TO_ZERO)
-    {
-        deviceRound = kRoundTowardZero;
-    }
-    else
-    {
-        assert(false && "Unreachable");
-    }
+    auto get_rounding = [](const cl_device_fp_config& fpConfig) {
+        if (fpConfig == CL_FP_ROUND_TO_NEAREST)
+        {
+            return kRoundToNearestEven;
+        }
+        else if (fpConfig == CL_FP_ROUND_TO_ZERO)
+        {
+            return kRoundTowardZero;
+        }
+        else
+        {
+            assert(false && "Unreachable");
+        }
+        return kDefaultRoundingMode;
+    };
 
     // Loop through all test cases
     for (auto &caseToTest: allTestCase)
@@ -875,6 +1050,12 @@ void generateRef(const cl_device_id device)
         // Make sure the reference result is empty
         assert(caseToTest->_correctBuffer.size() == 0);
 
+        const cl_device_fp_config* fpConfig = &fpConfigSingle;
+        if (caseToTest->_type == TYPE_HALF
+            || caseToTest->_type == TYPE_HALF_LIMITS)
+            fpConfig = &fpConfigHalf;
+        RoundingMode deviceRound = get_rounding(*fpConfig);
+
         // Loop through each input
         for (auto &params: caseToTest->_genParameters)
         {
diff --git a/test_conformance/relationals/CMakeLists.txt b/test_conformance/relationals/CMakeLists.txt
index ecaa056c..aa5dd6a1 100644
--- a/test_conformance/relationals/CMakeLists.txt
+++ b/test_conformance/relationals/CMakeLists.txt
@@ -3,8 +3,7 @@ set(MODULE_NAME RELATIONALS)
 set(${MODULE_NAME}_SOURCES
     main.cpp
     test_relationals.cpp
-    test_comparisons_float.cpp
-    test_comparisons_double.cpp
+    test_comparisons_fp.cpp
     test_shuffles.cpp
 )
 
diff --git a/test_conformance/relationals/procs.h b/test_conformance/relationals/procs.h
index 80000ef4..25e1ab32 100644
--- a/test_conformance/relationals/procs.h
+++ b/test_conformance/relationals/procs.h
@@ -15,7 +15,6 @@
 //
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
diff --git a/test_conformance/relationals/test_comparisons_double.cpp b/test_conformance/relationals/test_comparisons_double.cpp
deleted file mode 100644
index 3fe1124c..00000000
--- a/test_conformance/relationals/test_comparisons_double.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-#define TEST_SIZE 512
-
-const char *equivTestKernelPattern_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_double =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
-"\n"
-"}\n";
-
-
-const char *equivTestKernelPattern_double3 =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    double3 sampA = vload3(tid, (__global double *)sourceA);\n"
-"    double3 sampB = vload3(tid, (__global double *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
-"    vstore3(( sampA %s sampB ), tid, (__global long *)destValuesB);\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_double3 =
-"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
-"__kernel void sample_test(__global double%s *sourceA, __global double%s *sourceB, __global long%s *destValues, __global long%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    double3 sampA = vload3(tid, (__global double *)sourceA);\n"
-"    double3 sampB = vload3(tid, (__global double *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global long *)destValues);\n"
-"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global long *)destValuesB);\n"
-"\n"
-"}\n";
-
-
-typedef bool (*equivVerifyFn)( double inDataA, double inDataB );
-
-void verify_equiv_values_double( unsigned int vecSize, double *inDataA, double *inDataB, cl_long *outData, equivVerifyFn verifyFn )
-{
-    unsigned int i;
-    cl_long trueResult;
-    bool result;
-
-    trueResult = ( vecSize == 1 ) ? 1 : -1;
-    for( i = 0; i < vecSize; i++ )
-    {
-        result = verifyFn( inDataA[ i ], inDataB[ i ] );
-        outData[ i ] = result ? trueResult : 0;
-    }
-}
-
-void generate_equiv_test_data_double( double *outData, unsigned int vecSize, bool alpha, MTdata d )
-{
-    unsigned int i;
-
-    generate_random_data( kDouble, vecSize * TEST_SIZE, d, outData );
-
-    // Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
-    if( alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-    // Make sure the third set is filled regardless, to test the case where both have NANs
-    if( !alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-}
-
-int test_equiv_kernel_double(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
-                             unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[4];
-    double inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
-    cl_long outData[TEST_SIZE * 16], expected[16];
-    int error, i, j;
-    size_t threads[1], localThreads[1];
-    char kernelSource[10240];
-    char *programPtr;
-    char sizeName[4];
-
-
-    /* Create the source */
-    if( vecSize == 1 )
-        sizeName[ 0 ] = 0;
-    else
-        sprintf( sizeName, "%d", vecSize );
-
-    if(DENSE_PACK_VECS && vecSize == 3) {
-        if (strcmp(fnName, "islessgreater")) {
-            sprintf( kernelSource, equivTestKernelPattern_double3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-        } else {
-            sprintf( kernelSource, equivTestKernelPatternLessGreater_double3, sizeName, sizeName, sizeName, sizeName, fnName );
-        }
-    } else {
-        if (strcmp(fnName, "islessgreater")) {
-            sprintf( kernelSource, equivTestKernelPattern_double, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-        } else {
-            sprintf( kernelSource, equivTestKernelPatternLessGreater_double, sizeName, sizeName, sizeName, sizeName, fnName );
-        }
-    }
-
-    /* Create kernels */
-    programPtr = kernelSource;
-    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
-    {
-        return -1;
-    }
-
-    /* Generate some streams */
-    generate_equiv_test_data_double( inDataA, vecSize, true, d );
-    generate_equiv_test_data_double( inDataB, vecSize, false, d );
-
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
-    if( streams[0] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_double) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
-    if( streams[1] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[2] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-    streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_long ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[3] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-
-
-    /* Assign streams and execute */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Run the kernel */
-    threads[0] = TEST_SIZE;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-    /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-    /* And verify! */
-    for( i = 0; i < TEST_SIZE; i++ )
-    {
-        verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
-                          i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                return -1;
-            }
-        }
-    }
-
-    /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( cl_long ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-    /* And verify! */
-    for( i = 0; i < TEST_SIZE; i++ )
-    {
-        verify_equiv_values_double( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %lld, got %lld, source %f,%f\n",
-                          i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-int test_equiv_kernel_set_double(cl_device_id device, cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
-{
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int index;
-    int retVal = 0;
-
-    if (!is_extension_available(device, "cl_khr_fp64")) {
-        log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
-        return 0;
-    }
-    log_info("Testing doubles.\n");
-
-    for( index = 0; vecSizes[ index ] != 0; index++ )
-    {
-        // Test!
-        if( test_equiv_kernel_double(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
-        {
-            log_error( "   Vector double%d FAILED\n", vecSizes[ index ] );
-            retVal = -1;
-        }
-    }
-
-    return retVal;
-}
-
-bool isequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA == valueB;
-}
-
-int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isequal", "==", isequal_verify_fn_double, seed );
-}
-
-bool isnotequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return true;
-    return valueA != valueB;
-}
-
-int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isnotequal", "!=", isnotequal_verify_fn_double, seed );
-}
-
-bool isgreater_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA > valueB;
-}
-
-int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isgreater", ">", isgreater_verify_fn_double, seed );
-}
-
-bool isgreaterequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA >= valueB;
-}
-
-int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_double, seed );
-}
-
-bool isless_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA < valueB;
-}
-
-int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "isless", "<", isless_verify_fn_double, seed );
-}
-
-bool islessequal_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return valueA <= valueB;
-}
-
-int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "islessequal", "<=", islessequal_verify_fn_double, seed );
-}
-
-bool islessgreater_verify_fn_double( double valueA, double valueB )
-{
-    if( isnan( valueA ) || isnan( valueB ) )
-        return false;
-    return ( valueA < valueB ) || ( valueA > valueB );
-}
-
-int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed(gRandomSeed);
-    return test_equiv_kernel_set_double( device, context, queue, "islessgreater", "<>", islessgreater_verify_fn_double, seed );
-}
-
-
diff --git a/test_conformance/relationals/test_comparisons_float.cpp b/test_conformance/relationals/test_comparisons_float.cpp
deleted file mode 100644
index 989c70c7..00000000
--- a/test_conformance/relationals/test_comparisons_float.cpp
+++ /dev/null
@@ -1,363 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "testBase.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
-
-#define TEST_SIZE 512
-
-const char *equivTestKernelPattern_float =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_float =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
-"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
-"\n"
-"}\n";
-
-
-const char *equivTestKernelPattern_float3 =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    float3 sampA = vload3(tid, (__global float *)sourceA);\n"
-"    float3 sampB = vload3(tid, (__global float *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
-"    vstore3(( sampA %s sampB ), tid, (__global int *)destValuesB);\n"
-"\n"
-"}\n";
-
-const char *equivTestKernelPatternLessGreater_float3 =
-"__kernel void sample_test(__global float%s *sourceA, __global float%s *sourceB, __global int%s *destValues, __global int%s *destValuesB)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"    float3 sampA = vload3(tid, (__global float *)sourceA);\n"
-"    float3 sampB = vload3(tid, (__global float *)sourceB);\n"
-"    vstore3(%s( sampA, sampB ), tid, (__global int *)destValues);\n"
-"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global int *)destValuesB);\n"
-"\n"
-"}\n";
-
-typedef bool (*equivVerifyFn)( float inDataA, float inDataB );
-extern int gInfNanSupport;
-
-int IsFloatInfinity(float x)
-{
-    return isinf(x);
-}
-
-int IsFloatNaN(float x)
-{
-    return isnan(x);
-}
-
-void verify_equiv_values_float( unsigned int vecSize, float *inDataA, float *inDataB, int *outData, equivVerifyFn verifyFn )
-{
-    unsigned int i;
-    int trueResult;
-    bool result;
-
-    trueResult = ( vecSize == 1 ) ? 1 : -1;
-    for( i = 0; i < vecSize; i++ )
-    {
-        result = verifyFn( inDataA[ i ], inDataB[ i ] );
-        outData[ i ] = result ? trueResult : 0;
-    }
-}
-
-void generate_equiv_test_data_float( float *outData, unsigned int vecSize, bool alpha, MTdata d )
-{
-    unsigned int i;
-
-    generate_random_data( kFloat, vecSize * TEST_SIZE, d, outData );
-
-    // Fill the first few vectors with NAN in each vector element (or the second set if we're alpha, so we can test either case)
-    if( alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-    // Make sure the third set is filled regardless, to test the case where both have NANs
-    if( !alpha )
-        outData += vecSize * vecSize;
-    for( i = 0; i < vecSize; i++ )
-    {
-        outData[ 0 ] = NAN;
-        outData += vecSize + 1;
-    }
-}
-
-int test_equiv_kernel_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName,
-                       unsigned int vecSize, equivVerifyFn verifyFn, MTdata d )
-{
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-    clMemWrapper streams[4];
-    float inDataA[TEST_SIZE * 16], inDataB[ TEST_SIZE * 16 ];
-    int outData[TEST_SIZE * 16], expected[16];
-    int error, i, j;
-    size_t threads[1], localThreads[1];
-    char kernelSource[10240];
-    char *programPtr;
-    char sizeName[4];
-
-
-    /* Create the source */
-    if( vecSize == 1 )
-        sizeName[ 0 ] = 0;
-    else
-        sprintf( sizeName, "%d", vecSize );
-
-
-    if(DENSE_PACK_VECS && vecSize == 3) {
-  if (strcmp(fnName, "islessgreater")) {
-            sprintf( kernelSource, equivTestKernelPattern_float3, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-        } else {
-            sprintf( kernelSource, equivTestKernelPatternLessGreater_float3, sizeName, sizeName, sizeName, sizeName, fnName );
-        }
-    } else {
-        if (strcmp(fnName, "islessgreater")) {
-          sprintf( kernelSource, equivTestKernelPattern_float, sizeName, sizeName, sizeName, sizeName, fnName, opName );
-  } else {
-    sprintf( kernelSource, equivTestKernelPatternLessGreater_float, sizeName, sizeName, sizeName, sizeName, fnName );
-  }
-    }
-
-    /* Create kernels */
-    programPtr = kernelSource;
-    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
-    {
-        return -1;
-    }
-
-    /* Generate some streams */
-    generate_equiv_test_data_float( inDataA, vecSize, true, d );
-    generate_equiv_test_data_float( inDataB, vecSize, false, d );
-
-    streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataA, &error);
-    if( streams[0] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                sizeof(cl_float) * vecSize * TEST_SIZE,
-                                &inDataB, &error);
-    if( streams[1] == NULL )
-    {
-        print_error( error, "Creating input array A failed!\n");
-        return -1;
-    }
-    streams[2] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[2] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-  streams[3] = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof( cl_int ) * vecSize * TEST_SIZE, NULL, &error);
-    if( streams[3] == NULL )
-    {
-        print_error( error, "Creating output array failed!\n");
-        return -1;
-    }
-
-
-    /* Assign streams and execute */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 2, sizeof( streams[2] ), &streams[2] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-  error = clSetKernelArg( kernel, 3, sizeof( streams[3] ), &streams[3] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-
-
-    /* Run the kernel */
-    threads[0] = TEST_SIZE;
-
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-  /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[2], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-  /* And verify! */
-  for( i = 0; i < TEST_SIZE; i++ )
-  {
-        verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
-                  i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                return -1;
-            }
-        }
-  }
-
-  /* Now get the results */
-    error = clEnqueueReadBuffer( queue, streams[3], true, 0, sizeof( int ) * TEST_SIZE * vecSize, outData, 0, NULL, NULL );
-    test_error( error, "Unable to read output array!" );
-
-  /* And verify! */
-    int fail = 0;
-    for( i = 0; i < TEST_SIZE; i++ )
-    {
-        verify_equiv_values_float( vecSize, &inDataA[ i * vecSize ], &inDataB[ i * vecSize ], expected, verifyFn);
-
-        for( j = 0; j < (int)vecSize; j++ )
-        {
-            if( expected[ j ] != outData[ i * vecSize + j ] )
-            {
-                if (gInfNanSupport == 0)
-                {
-                    if (IsFloatNaN(inDataA[i*vecSize + j]) || IsFloatNaN (inDataB[i*vecSize + j]))
-                    {
-                        fail = 0;
-                    }
-                    else
-                        fail = 1;
-                }
-                if (fail)
-                {
-                    log_error( "ERROR: Data sample %d:%d at size %d does not validate! Expected %d, got %d, source %f,%f\n",
-                      i, j, vecSize, expected[ j ], outData[ i * vecSize + j ], inDataA[i*vecSize + j], inDataB[i*vecSize + j] );
-                    return -1;
-                }
-            }
-        }
-  }
-
-  return 0;
-}
-
-int test_equiv_kernel_set_float(cl_context context, cl_command_queue queue, const char *fnName, const char *opName, equivVerifyFn verifyFn, MTdata d )
-{
-    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int index;
-    int retVal = 0;
-
-    for( index = 0; vecSizes[ index ] != 0; index++ )
-    {
-        // Test!
-        if( test_equiv_kernel_float(context, queue, fnName, opName, vecSizes[ index ], verifyFn, d ) != 0 )
-        {
-            log_error( "   Vector float%d FAILED\n", vecSizes[ index ] );
-            retVal = -1;
-        }
-    }
-
-    return retVal;
-}
-
-bool isequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA == valueB;
-}
-
-int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isequal", "==", isequal_verify_fn_float, seed );
-}
-
-bool isnotequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA != valueB;
-}
-
-int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isnotequal", "!=", isnotequal_verify_fn_float, seed );
-}
-
-bool isgreater_verify_fn_float( float valueA, float valueB )
-{
-    return valueA > valueB;
-}
-
-int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isgreater", ">", isgreater_verify_fn_float, seed );
-}
-
-bool isgreaterequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA >= valueB;
-}
-
-int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isgreaterequal", ">=", isgreaterequal_verify_fn_float, seed );
-}
-
-bool isless_verify_fn_float( float valueA, float valueB )
-{
-    return valueA < valueB;
-}
-
-int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "isless", "<", isless_verify_fn_float, seed );
-}
-
-bool islessequal_verify_fn_float( float valueA, float valueB )
-{
-    return valueA <= valueB;
-}
-
-int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "islessequal", "<=", islessequal_verify_fn_float, seed );
-}
-
-bool islessgreater_verify_fn_float( float valueA, float valueB )
-{
-    return ( valueA < valueB ) || ( valueA > valueB );
-}
-
-int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    RandomSeed seed( gRandomSeed );
-    return test_equiv_kernel_set_float( context, queue, "islessgreater", "<>", islessgreater_verify_fn_float, seed );
-}
-
-
diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp
new file mode 100644
index 00000000..580b7422
--- /dev/null
+++ b/test_conformance/relationals/test_comparisons_fp.cpp
@@ -0,0 +1,661 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <iostream>
+#include <map>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+#include <CL/cl_half.h>
+
+#include "test_comparisons_fp.h"
+
+#define TEST_SIZE 512
+
+static char ftype[32] = { 0 };
+static char ftype_vec[32] = { 0 };
+static char itype[32] = { 0 };
+static char itype_vec[32] = { 0 };
+static char extension[128] = { 0 };
+
+// clang-format off
+// for readability sake keep this section unformatted
+const char* equivTestKernPat[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = sourceA[tid] %s sourceB[tid];\n"
+"}\n"};
+
+const char* equivTestKernPatLessGreater[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    destValues[tid] = %s( sourceA[tid], sourceB[tid] );\n"
+"    destValuesB[tid] = (sourceA[tid] < sourceB[tid]) | (sourceA[tid] > sourceB[tid]);\n"
+"}\n"};
+
+const char* equivTestKerPat_3[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    ",ftype_vec," sampA = vload3(tid, (__global ",ftype," *)sourceA);\n"
+"    ",ftype_vec," sampB = vload3(tid, (__global ",ftype," *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global ",itype," *)destValues);\n"
+"    vstore3(( sampA %s sampB ), tid, (__global ",itype," *)destValuesB);\n"
+"}\n"};
+
+const char* equivTestKerPatLessGreater_3[] = {
+extension,
+"__kernel void sample_test(__global ", ftype_vec, " *sourceA, __global ", ftype_vec,
+" *sourceB, __global ", itype_vec, " *destValues, __global ", itype_vec, " *destValuesB)\n"
+"{\n"
+"    int  tid = get_global_id(0);\n"
+"    ", ftype_vec, " sampA = vload3(tid, (__global ", ftype, " *)sourceA);\n"
+"    ", ftype_vec, " sampB = vload3(tid, (__global ", ftype, " *)sourceB);\n"
+"    vstore3(%s( sampA, sampB ), tid, (__global ", itype, " *)destValues);\n"
+"    vstore3(( sampA < sampB ) | (sampA > sampB), tid, (__global ", itype, " *)destValuesB);\n"
+"}\n"
+};
+// clang-format on
+
+
+std::string concat_kernel(const char* sstr[], int num)
+{
+    std::string res;
+    for (int i = 0; i < num; i++) res += std::string(sstr[i]);
+    return res;
+}
+
+template <typename... Args>
+std::string string_format(const std::string& format, Args... args)
+{
+    int size_s = std::snprintf(nullptr, 0, format.c_str(), args...)
+        + 1; // Extra space for '\0'
+    if (size_s <= 0)
+    {
+        throw std::runtime_error("Error during formatting.");
+    }
+    auto size = static_cast<size_t>(size_s);
+    std::unique_ptr<char[]> buf(new char[size]);
+    std::snprintf(buf.get(), size, format.c_str(), args...);
+    return std::string(buf.get(),
+                       buf.get() + size - 1); // We don't want the '\0' inside
+}
+
+template <typename T, typename F> bool verify(const T& A, const T& B)
+{
+    return F()(A, B);
+}
+
+RelationalsFPTest::RelationalsFPTest(cl_context context, cl_device_id device,
+                                     cl_command_queue queue, const char* fn,
+                                     const char* op)
+    : context(context), device(device), queue(queue), fnName(fn), opName(op),
+      halfFlushDenormsToZero(0)
+{
+    // hardcoded for now, to be changed into typeid().name solution in future
+    // for now C++ spec doesn't guarantee human readable type name
+
+    eqTypeNames = { { kHalf, "short" },
+                    { kFloat, "int" },
+                    { kDouble, "long" } };
+}
+
+template <typename T>
+void RelationalsFPTest::generate_equiv_test_data(T* outData,
+                                                 unsigned int vecSize,
+                                                 bool alpha,
+                                                 const RelTestParams<T>& param,
+                                                 const MTdata& d)
+{
+    unsigned int i;
+
+    generate_random_data(param.dataType, vecSize * TEST_SIZE, d, outData);
+
+    // Fill the first few vectors with NAN in each vector element (or the second
+    // set if we're alpha, so we can test either case)
+    if (alpha) outData += vecSize * vecSize;
+    for (i = 0; i < vecSize; i++)
+    {
+        outData[0] = param.nan;
+        outData += vecSize + 1;
+    }
+    // Make sure the third set is filled regardless, to test the case where both
+    // have NANs
+    if (!alpha) outData += vecSize * vecSize;
+    for (i = 0; i < vecSize; i++)
+    {
+        outData[0] = param.nan;
+        outData += vecSize + 1;
+    }
+}
+
+template <typename T, typename U>
+void RelationalsFPTest::verify_equiv_values(unsigned int vecSize,
+                                            const T* const inDataA,
+                                            const T* const inDataB,
+                                            U* const outData,
+                                            const VerifyFunc<T>& verifyFn)
+{
+    unsigned int i;
+    int trueResult;
+    bool result;
+
+    trueResult = (vecSize == 1) ? 1 : -1;
+    for (i = 0; i < vecSize; i++)
+    {
+        result = verifyFn(inDataA[i], inDataB[i]);
+        outData[i] = result ? trueResult : 0;
+    }
+}
+
+template <typename T>
+int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize,
+                                         const RelTestParams<T>& param,
+                                         const MTdata& d)
+{
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper streams[4];
+    T inDataA[TEST_SIZE * 16], inDataB[TEST_SIZE * 16];
+
+    // support half, float, double equivalents - otherwise assert
+    typedef typename std::conditional<
+        (sizeof(T) == sizeof(std::int16_t)), std::int16_t,
+        typename std::conditional<(sizeof(T) == sizeof(std::int32_t)),
+                                  std::int32_t, std::int64_t>::type>::type U;
+
+    U outData[TEST_SIZE * 16], expected[16];
+    int error, i, j;
+    size_t threads[1], localThreads[1];
+    std::string kernelSource;
+    char sizeName[4];
+
+    /* Create the source */
+    if (vecSize == 1)
+        sizeName[0] = 0;
+    else
+        sprintf(sizeName, "%d", vecSize);
+
+    if (eqTypeNames.find(param.dataType) == eqTypeNames.end())
+        log_error(
+            "RelationalsFPTest::test_equiv_kernel: unsupported fp data type");
+
+    sprintf(ftype, "%s", get_explicit_type_name(param.dataType));
+    sprintf(ftype_vec, "%s%s", get_explicit_type_name(param.dataType),
+            sizeName);
+
+    sprintf(itype, "%s", eqTypeNames[param.dataType].c_str());
+    sprintf(itype_vec, "%s%s", eqTypeNames[param.dataType].c_str(), sizeName);
+
+    if (std::is_same<T, double>::value)
+        strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
+    else if (std::is_same<T, cl_half>::value)
+        strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+    else
+        extension[0] = '\0';
+
+    if (DENSE_PACK_VECS && vecSize == 3)
+    {
+        if (strcmp(fnName.c_str(), "islessgreater"))
+        {
+            auto str =
+                concat_kernel(equivTestKerPat_3,
+                              sizeof(equivTestKerPat_3) / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str(), opName.c_str());
+        }
+        else
+        {
+            auto str = concat_kernel(equivTestKerPatLessGreater_3,
+                                     sizeof(equivTestKerPatLessGreater_3)
+                                         / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str());
+        }
+    }
+    else
+    {
+        if (strcmp(fnName.c_str(), "islessgreater"))
+        {
+            auto str =
+                concat_kernel(equivTestKernPat,
+                              sizeof(equivTestKernPat) / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str(), opName.c_str());
+        }
+        else
+        {
+            auto str = concat_kernel(equivTestKernPatLessGreater,
+                                     sizeof(equivTestKernPatLessGreater)
+                                         / sizeof(const char*));
+            kernelSource = string_format(str, fnName.c_str());
+        }
+    }
+
+    /* Create kernels */
+    const char* programPtr = kernelSource.c_str();
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char**)&programPtr, "sample_test"))
+    {
+        return -1;
+    }
+
+    /* Generate some streams */
+    generate_equiv_test_data<T>(inDataA, vecSize, true, param, d);
+    generate_equiv_test_data<T>(inDataB, vecSize, false, param, d);
+
+    streams[0] =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(T) * vecSize * TEST_SIZE, &inDataA, &error);
+    if (streams[0] == NULL)
+    {
+        print_error(error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[1] =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(T) * vecSize * TEST_SIZE, &inDataB, &error);
+    if (streams[1] == NULL)
+    {
+        print_error(error, "Creating input array A failed!\n");
+        return -1;
+    }
+    streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                sizeof(U) * vecSize * TEST_SIZE, NULL, &error);
+    if (streams[2] == NULL)
+    {
+        print_error(error, "Creating output array failed!\n");
+        return -1;
+    }
+    streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                sizeof(U) * vecSize * TEST_SIZE, NULL, &error);
+    if (streams[3] == NULL)
+    {
+        print_error(error, "Creating output array failed!\n");
+        return -1;
+    }
+
+    /* Assign streams and execute */
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 2, sizeof(streams[2]), &streams[2]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 3, sizeof(streams[3]), &streams[3]);
+    test_error(error, "Unable to set indexed kernel arguments");
+
+    /* Run the kernel */
+    threads[0] = TEST_SIZE;
+
+    error = get_max_common_work_group_size(context, kernel, threads[0],
+                                           &localThreads[0]);
+    test_error(error, "Unable to get work group size to use");
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer(queue, streams[2], true, 0,
+                                sizeof(U) * TEST_SIZE * vecSize, outData, 0,
+                                NULL, NULL);
+    test_error(error, "Unable to read output array!");
+
+    auto verror_msg = [](const int& i, const int& j, const unsigned& vs,
+                         const U& e, const U& o, const T& iA, const T& iB) {
+        std::stringstream sstr;
+        sstr << "ERROR: Data sample " << i << ":" << j << " at size " << vs
+             << " does not validate! Expected " << e << ", got " << o
+             << ", source " << iA << ":" << iB << std::endl;
+        log_error(sstr.str().c_str());
+    };
+
+    /* And verify! */
+    for (i = 0; i < TEST_SIZE; i++)
+    {
+        verify_equiv_values<T, U>(vecSize, &inDataA[i * vecSize],
+                                  &inDataB[i * vecSize], expected,
+                                  param.verifyFn);
+
+        for (j = 0; j < (int)vecSize; j++)
+        {
+            if (expected[j] != outData[i * vecSize + j])
+            {
+                bool acceptFail = true;
+                if (std::is_same<T, cl_half>::value)
+                {
+                    bool in_denorm = IsHalfSubnormal(inDataA[i * vecSize + j])
+                        || IsHalfSubnormal(inDataB[i * vecSize + j]);
+
+                    if (halfFlushDenormsToZero && in_denorm)
+                    {
+                        acceptFail = false;
+                    }
+                }
+
+                if (acceptFail)
+                {
+                    verror_msg(
+                        i, j, vecSize, expected[j], outData[i * vecSize + j],
+                        inDataA[i * vecSize + j], inDataB[i * vecSize + j]);
+                    return -1;
+                }
+            }
+        }
+    }
+
+    /* Now get the results */
+    error = clEnqueueReadBuffer(queue, streams[3], true, 0,
+                                sizeof(U) * TEST_SIZE * vecSize, outData, 0,
+                                NULL, NULL);
+    test_error(error, "Unable to read output array!");
+
+    /* And verify! */
+    int fail = 0;
+    for (i = 0; i < TEST_SIZE; i++)
+    {
+        verify_equiv_values<T, U>(vecSize, &inDataA[i * vecSize],
+                                  &inDataB[i * vecSize], expected,
+                                  param.verifyFn);
+
+        for (j = 0; j < (int)vecSize; j++)
+        {
+            if (expected[j] != outData[i * vecSize + j])
+            {
+                if (std::is_same<T, float>::value)
+                {
+                    if (gInfNanSupport == 0)
+                    {
+                        if (isnan(inDataA[i * vecSize + j])
+                            || isnan(inDataB[i * vecSize + j]))
+                            fail = 0;
+                        else
+                            fail = 1;
+                    }
+                    if (fail)
+                    {
+                        verror_msg(i, j, vecSize, expected[j],
+                                   outData[i * vecSize + j],
+                                   inDataA[i * vecSize + j],
+                                   inDataB[i * vecSize + j]);
+                        return -1;
+                    }
+                }
+                else if (std::is_same<T, cl_half>::value)
+                {
+                    bool in_denorm = IsHalfSubnormal(inDataA[i * vecSize + j])
+                        || IsHalfSubnormal(inDataB[i * vecSize + j]);
+
+                    if (!(halfFlushDenormsToZero && in_denorm))
+                    {
+                        verror_msg(i, j, vecSize, expected[j],
+                                   outData[i * vecSize + j],
+                                   inDataA[i * vecSize + j],
+                                   inDataB[i * vecSize + j]);
+                        return -1;
+                    }
+                }
+                else
+                {
+                    verror_msg(
+                        i, j, vecSize, expected[j], outData[i * vecSize + j],
+                        inDataA[i * vecSize + j], inDataB[i * vecSize + j]);
+                    return -1;
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+template <typename T>
+int RelationalsFPTest::test_relational(int numElements,
+                                       const RelTestParams<T>& param)
+{
+    RandomSeed seed(gRandomSeed);
+    unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+    unsigned int index;
+    int retVal = 0;
+
+    for (index = 0; vecSizes[index] != 0; index++)
+    {
+        // Test!
+        if (test_equiv_kernel<T>(vecSizes[index], param, seed) != 0)
+        {
+            log_error("   Vector %s%d FAILED\n", ftype, vecSizes[index]);
+            retVal = -1;
+        }
+    }
+    return retVal;
+}
+
+cl_int RelationalsFPTest::SetUp(int elements)
+{
+    if (is_extension_available(device, "cl_khr_fp16"))
+    {
+        cl_device_fp_config config = 0;
+        cl_int error = clGetDeviceInfo(device, CL_DEVICE_HALF_FP_CONFIG,
+                                       sizeof(config), &config, NULL);
+        test_error(error, "Unable to get device CL_DEVICE_HALF_FP_CONFIG");
+
+        halfFlushDenormsToZero = (0 == (config & CL_FP_DENORM));
+        log_info("Supports half precision denormals: %s\n",
+                 halfFlushDenormsToZero ? "NO" : "YES");
+    }
+
+    return CL_SUCCESS;
+}
+
+cl_int RelationalsFPTest::Run()
+{
+    cl_int error = CL_SUCCESS;
+    for (auto&& param : params)
+    {
+        switch (param->dataType)
+        {
+            case kHalf:
+                error = test_relational<cl_half>(
+                    num_elements, *((RelTestParams<cl_half>*)param.get()));
+                break;
+            case kFloat:
+                error = test_relational<float>(
+                    num_elements, *((RelTestParams<float>*)param.get()));
+                break;
+            case kDouble:
+                error = test_relational<double>(
+                    num_elements, *((RelTestParams<double>*)param.get()));
+                break;
+            default:
+                test_error(-1, "RelationalsFPTest::Run: incorrect fp type");
+                break;
+        }
+        test_error(error, "RelationalsFPTest::Run: test_relational failed");
+    }
+    return CL_SUCCESS;
+}
+
+cl_int IsEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_equals_to>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::equal_to<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::equal_to<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsNotEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_not_equals_to>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::not_equal_to<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::not_equal_to<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsGreaterFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_greater>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::greater<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::greater<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsGreaterEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_greater_equal>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::greater_equal<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::greater_equal<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsLessFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_less>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::less<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::less<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsLessEqualFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_less_equal>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, std::less_equal<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, std::less_equal<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+cl_int IsLessGreaterFPTest::SetUp(int elements)
+{
+    num_elements = elements;
+    if (is_extension_available(device, "cl_khr_fp16"))
+        params.emplace_back(new RelTestParams<cl_half>(
+            &verify<cl_half, half_less_greater>, kHalf, HALF_NAN));
+
+    params.emplace_back(new RelTestParams<float>(
+        &verify<float, less_greater<float>>, kFloat, NAN));
+
+    if (is_extension_available(device, "cl_khr_fp64"))
+        params.emplace_back(new RelTestParams<double>(
+            &verify<double, less_greater<double>>, kDouble, NAN));
+
+    return RelationalsFPTest::SetUp(elements);
+}
+
+int test_relational_isequal(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsEqualFPTest>(device, context, queue, numElements);
+}
+
+int test_relational_isnotequal(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsNotEqualFPTest>(device, context, queue,
+                                            numElements);
+}
+
+int test_relational_isgreater(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsGreaterFPTest>(device, context, queue, numElements);
+}
+
+int test_relational_isgreaterequal(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsGreaterEqualFPTest>(device, context, queue,
+                                                numElements);
+}
+
+int test_relational_isless(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsLessFPTest>(device, context, queue, numElements);
+}
+
+int test_relational_islessequal(cl_device_id device, cl_context context,
+                                cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsLessEqualFPTest>(device, context, queue,
+                                             numElements);
+}
+
+int test_relational_islessgreater(cl_device_id device, cl_context context,
+                                  cl_command_queue queue, int numElements)
+{
+    return MakeAndRunTest<IsLessGreaterFPTest>(device, context, queue,
+                                               numElements);
+}
diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h
new file mode 100644
index 00000000..7faca1c5
--- /dev/null
+++ b/test_conformance/relationals/test_comparisons_fp.h
@@ -0,0 +1,227 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _TEST_COMPARISONS_FP_H
+#define _TEST_COMPARISONS_FP_H
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <CL/cl_half.h>
+
+#include "testBase.h"
+
+#define HALF_NAN 0x7e00
+template <typename T> using VerifyFunc = bool (*)(const T &, const T &);
+
+struct RelTestBase
+{
+    explicit RelTestBase(const ExplicitTypes &dt): dataType(dt) {}
+    ExplicitTypes dataType;
+};
+
+template <typename T> struct RelTestParams : public RelTestBase
+{
+    RelTestParams(const VerifyFunc<T> &vfn, const ExplicitTypes &dt,
+                  const T &nan_)
+        : RelTestBase(dt), verifyFn(vfn), nan(nan_)
+    {}
+
+    VerifyFunc<T> verifyFn;
+    T nan;
+};
+
+struct RelationalsFPTest
+{
+    RelationalsFPTest(cl_context context, cl_device_id device,
+                      cl_command_queue queue, const char *fn, const char *op);
+
+    virtual cl_int SetUp(int elements);
+
+    // Test body returning an OpenCL error code
+    virtual cl_int Run();
+
+    template <typename T>
+    void generate_equiv_test_data(T *, unsigned int, bool,
+                                  const RelTestParams<T> &, const MTdata &);
+
+    template <typename T, typename U>
+    void verify_equiv_values(unsigned int, const T *const, const T *const,
+                             U *const, const VerifyFunc<T> &);
+
+    template <typename T>
+    int test_equiv_kernel(unsigned int vecSize, const RelTestParams<T> &param,
+                          const MTdata &d);
+
+    template <typename T>
+    int test_relational(int numElements, const RelTestParams<T> &param);
+
+protected:
+    cl_context context;
+    cl_device_id device;
+    cl_command_queue queue;
+
+    std::string fnName;
+    std::string opName;
+
+    std::vector<std::unique_ptr<RelTestBase>> params;
+    std::map<ExplicitTypes, std::string> eqTypeNames;
+    size_t num_elements;
+
+    int halfFlushDenormsToZero;
+};
+
+struct IsEqualFPTest : public RelationalsFPTest
+{
+    IsEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isequal", "==")
+    {}
+    cl_int SetUp(int elements) override;
+
+    // for correct handling nan/inf we need fp value
+    struct half_equals_to
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) == cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsNotEqualFPTest : public RelationalsFPTest
+{
+    IsNotEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isnotequal", "!=")
+    {}
+    cl_int SetUp(int elements) override;
+
+    // for correct handling nan/inf we need fp value
+    struct half_not_equals_to
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) != cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsGreaterFPTest : public RelationalsFPTest
+{
+    IsGreaterFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isgreater", ">")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_greater
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) > cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsGreaterEqualFPTest : public RelationalsFPTest
+{
+    IsGreaterEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isgreaterequal", ">=")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_greater_equal
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) >= cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsLessFPTest : public RelationalsFPTest
+{
+    IsLessFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "isless", "<")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_less
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) < cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsLessEqualFPTest : public RelationalsFPTest
+{
+    IsLessEqualFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "islessequal", "<=")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_less_equal
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            return cl_half_to_float(lhs) <= cl_half_to_float(rhs);
+        }
+    };
+};
+
+struct IsLessGreaterFPTest : public RelationalsFPTest
+{
+    IsLessGreaterFPTest(cl_device_id d, cl_context c, cl_command_queue q)
+        : RelationalsFPTest(c, d, q, "islessgreater", "<>")
+    {}
+    cl_int SetUp(int elements) override;
+
+    struct half_less_greater
+    {
+        bool operator()(const cl_half &lhs, const cl_half &rhs) const
+        {
+            float flhs = cl_half_to_float(lhs), frhs = cl_half_to_float(rhs);
+            return (flhs < frhs) || (flhs > frhs);
+        }
+    };
+
+    template <typename T> struct less_greater
+    {
+        bool operator()(const T &lhs, const T &rhs) const
+        {
+            return (lhs < rhs) || (lhs > rhs);
+        }
+    };
+};
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements)
+{
+    auto test_fixture = T(device, context, queue);
+
+    cl_int error = test_fixture.SetUp(num_elements);
+    test_error_ret(error, "Error in test initialization", TEST_FAIL);
+
+    error = test_fixture.Run();
+    test_error_ret(error, "Test Failed", TEST_FAIL);
+
+    return TEST_PASS;
+}
+
+#endif // _TEST_COMPARISONS_FP_H
diff --git a/test_conformance/relationals/test_relationals.cpp b/test_conformance/relationals/test_relationals.cpp
index 5a874af7..d744fb2a 100644
--- a/test_conformance/relationals/test_relationals.cpp
+++ b/test_conformance/relationals/test_relationals.cpp
@@ -18,8 +18,11 @@
 #include "harness/typeWrappers.h"
 #include "harness/testHarness.h"
 
+// clang-format off
+
 const char *anyAllTestKernelPattern =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -29,6 +32,7 @@ const char *anyAllTestKernelPattern =
 
 const char *anyAllTestKernelPatternVload =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global int *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -36,6 +40,8 @@ const char *anyAllTestKernelPatternVload =
 "\n"
 "}\n";
 
+// clang-format on
+
 #define TEST_SIZE 512
 
 typedef int (*anyAllVerifyFn)( ExplicitType vecType, unsigned int vecSize, void *inData );
@@ -67,14 +73,22 @@ int test_any_all_kernel(cl_context context, cl_command_queue queue,
              get_explicit_type_name( vecType ), sizeName);
     if(DENSE_PACK_VECS && vecSize == 3) {
         // anyAllTestKernelPatternVload
-        sprintf( kernelSource, anyAllTestKernelPatternVload,
-                vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName, fnName,
-                get_explicit_type_name(vecType));
+        sprintf(
+            kernelSource, anyAllTestKernelPatternVload,
+            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                               : "",
+            vecType == kHalf ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                             : "",
+            get_explicit_type_name(vecType), sizeName, fnName,
+            get_explicit_type_name(vecType));
     } else {
-        sprintf( kernelSource, anyAllTestKernelPattern,
-                vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName, fnName );
+        sprintf(
+            kernelSource, anyAllTestKernelPattern,
+            vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                               : "",
+            vecType == kHalf ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                             : "",
+            get_explicit_type_name(vecType), sizeName, fnName);
     }
     /* Create kernels */
     programPtr = kernelSource;
@@ -282,8 +296,11 @@ int test_relational_all(cl_device_id device, cl_context context, cl_command_queu
     return retVal;
 }
 
+// clang-format off
+
 const char *selectTestKernelPattern =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -294,6 +311,7 @@ const char *selectTestKernelPattern =
 
 const char *selectTestKernelPatternVload =
 "%s\n" // optional pragma
+"%s\n" // optional pragma
 "__kernel void sample_test(__global %s%s *sourceA, __global %s%s *sourceB, __global %s%s *sourceC, __global %s%s *destValues)\n"
 "{\n"
 "    int  tid = get_global_id(0);\n"
@@ -302,6 +320,8 @@ const char *selectTestKernelPatternVload =
 "\n"
 "}\n";
 
+// clang-format on
+
 typedef void (*selectVerifyFn)( ExplicitType vecType, ExplicitType testVecType, unsigned int vecSize, void *inDataA, void *inDataB, void *inDataTest, void *outData );
 
 int test_select_kernel(cl_context context, cl_command_queue queue, const char *fnName,
@@ -335,26 +355,34 @@ int test_select_kernel(cl_context context, cl_command_queue queue, const char *f
 
     if(DENSE_PACK_VECS && vecSize == 3) {
         // anyAllTestKernelPatternVload
-        sprintf( kernelSource, selectTestKernelPatternVload,
-                (vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( testVecType ), sizeName,
-                get_explicit_type_name( vecType ), outSizeName,
-                get_explicit_type_name( vecType ), sizeName,
-                fnName,
-                get_explicit_type_name( vecType ),
-                get_explicit_type_name( vecType ),
-                get_explicit_type_name( vecType ),
-                get_explicit_type_name( testVecType ) );
+        sprintf(kernelSource, selectTestKernelPatternVload,
+                (vecType == kDouble || testVecType == kDouble)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                    : "",
+                (vecType == kHalf || testVecType == kHalf)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                    : "",
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(testVecType), sizeName,
+                get_explicit_type_name(vecType), outSizeName,
+                get_explicit_type_name(vecType), sizeName, fnName,
+                get_explicit_type_name(vecType),
+                get_explicit_type_name(vecType),
+                get_explicit_type_name(vecType),
+                get_explicit_type_name(testVecType));
     } else {
-        sprintf( kernelSource, selectTestKernelPattern,
-                (vecType == kDouble || testVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( vecType ), sizeName,
-                get_explicit_type_name( testVecType ), sizeName,
-                get_explicit_type_name( vecType ), outSizeName,
-                fnName );
+        sprintf(kernelSource, selectTestKernelPattern,
+                (vecType == kDouble || testVecType == kDouble)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
+                    : "",
+                (vecType == kHalf || testVecType == kHalf)
+                    ? "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"
+                    : "",
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(vecType), sizeName,
+                get_explicit_type_name(testVecType), sizeName,
+                get_explicit_type_name(vecType), outSizeName, fnName);
     }
 
     /* Create kernels */
@@ -500,14 +528,17 @@ void bitselect_verify_fn( ExplicitType vecType, ExplicitType testVecType, unsign
 
 int test_relational_bitselect(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    constexpr ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort,
+                                         kInt,  kUInt,  kLong,  kULong,
+                                         kHalf, kFloat, kDouble };
+    constexpr auto vecTypeSize = sizeof(vecType) / sizeof(ExplicitType);
     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
     unsigned int index, typeIndex;
     int retVal = 0;
     RandomSeed seed( gRandomSeed );
 
 
-    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    for (typeIndex = 0; typeIndex < vecTypeSize; typeIndex++)
     {
         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
             continue;
@@ -522,6 +553,19 @@ int test_relational_bitselect(cl_device_id device, cl_context context, cl_comman
             else
                 log_info("Testing doubles.\n");
         }
+
+        if (vecType[typeIndex] == kHalf)
+        {
+            if (!is_extension_available(device, "cl_khr_fp16"))
+            {
+                log_info("Extension cl_khr_fp16 not supported; skipping half "
+                         "tests.\n");
+                continue;
+            }
+            else
+                log_info("Testing halfs.\n");
+        }
+
         for( index = 0; vecSizes[ index ] != 0; index++ )
         {
             // Test!
@@ -584,14 +628,18 @@ void select_signed_verify_fn( ExplicitType vecType, ExplicitType testVecType, un
 
 int test_relational_select_signed(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    constexpr ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort,
+                                         kInt,  kUInt,  kLong,  kULong,
+                                         kHalf, kFloat, kDouble };
+    constexpr auto vecTypeSize = sizeof(vecType) / sizeof(ExplicitType);
+
     ExplicitType testVecType[] = { kChar, kShort, kInt, kLong, kNumExplicitTypes };
     unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
     unsigned int index, typeIndex, testTypeIndex;
     int retVal = 0;
     RandomSeed seed( gRandomSeed );
 
-    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    for (typeIndex = 0; typeIndex < vecTypeSize; typeIndex++)
     {
         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
             continue;
@@ -604,6 +652,19 @@ int test_relational_select_signed(cl_device_id device, cl_context context, cl_co
                 log_info("Testing doubles.\n");
             }
         }
+        if (vecType[typeIndex] == kHalf)
+        {
+            if (!is_extension_available(device, "cl_khr_fp16"))
+            {
+                log_info("Extension cl_khr_fp16 not supported; skipping half "
+                         "tests.\n");
+                continue;
+            }
+            else
+            {
+                log_info("Testing halfs.\n");
+            }
+        }
         for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
         {
             if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
@@ -673,7 +734,11 @@ void select_unsigned_verify_fn( ExplicitType vecType, ExplicitType testVecType,
 
 int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
+    constexpr ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort,
+                                         kInt,  kUInt,  kLong,  kULong,
+                                         kHalf, kFloat, kDouble };
+    constexpr auto vecTypeSize = sizeof(vecType) / sizeof(ExplicitType);
+
     ExplicitType testVecType[] = { kUChar, kUShort, kUInt, kULong, kNumExplicitTypes };
     unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
     unsigned int index, typeIndex, testTypeIndex;
@@ -681,7 +746,7 @@ int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_
     RandomSeed seed(gRandomSeed);
 
 
-    for( typeIndex = 0; typeIndex < 10; typeIndex++ )
+    for (typeIndex = 0; typeIndex < vecTypeSize; typeIndex++)
     {
         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong)
             continue;
@@ -694,6 +759,19 @@ int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_
                 log_info("Testing doubles.\n");
             }
         }
+        if (vecType[typeIndex] == kHalf)
+        {
+            if (!is_extension_available(device, "cl_khr_fp16"))
+            {
+                log_info("Extension cl_khr_fp16 not supported; skipping half "
+                         "tests.\n");
+                continue;
+            }
+            else
+            {
+                log_info("Testing halfs.\n");
+            }
+        }
         for( testTypeIndex = 0; testVecType[ testTypeIndex ] != kNumExplicitTypes; testTypeIndex++ )
         {
             if( testVecType[ testTypeIndex ] != vecType[ typeIndex ] )
@@ -714,85 +792,3 @@ int test_relational_select_unsigned(cl_device_id device, cl_context context, cl_
 
     return retVal;
 }
-
-
-
-extern int test_relational_isequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isnotequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreaterequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isless_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessequal_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessgreater_float(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isnotequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isgreaterequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_isless_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessequal_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-extern int test_relational_islessgreater_double(cl_device_id device, cl_context context, cl_command_queue queue, int numElements );
-
-
-int test_relational_isequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isequal_float( device, context, queue, numElements );
-    err |= test_relational_isequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isnotequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isnotequal_float( device, context, queue, numElements );
-    err |= test_relational_isnotequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isgreater_float( device, context, queue, numElements );
-    err |= test_relational_isgreater_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isgreaterequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isgreaterequal_float( device, context, queue, numElements );
-    err |= test_relational_isgreaterequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_isless(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_isless_float( device, context, queue, numElements );
-    err |= test_relational_isless_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_islessequal(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_islessequal_float( device, context, queue, numElements );
-    err |= test_relational_islessequal_double( device, context, queue, numElements );
-    return err;
-}
-
-
-int test_relational_islessgreater(cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int err = 0;
-    err |= test_relational_islessgreater_float( device, context, queue, numElements );
-    err |= test_relational_islessgreater_double( device, context, queue, numElements );
-    return err;
-}
-
-
diff --git a/test_conformance/relationals/test_shuffles.cpp b/test_conformance/relationals/test_shuffles.cpp
index 5fd3b6c5..5193b9d2 100644
--- a/test_conformance/relationals/test_shuffles.cpp
+++ b/test_conformance/relationals/test_shuffles.cpp
@@ -13,6 +13,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
+
+#include <iomanip>
+
 #include "testBase.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
@@ -201,14 +204,13 @@ const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengt
 
     size_t j, idx;
 
-    // Assume we don't have to use numbers
-    byNumber = 0;
-    // Check to see
+    // Assume we don't have to use numbered indices (.s0123...).
+    byNumber = false;
+    // Check if any index is beyond xyzw, which requires to use numbers.
     for( j = 0; j < lengthToUse; j++ )
     {
         if (order[j] > 3) {
-            // An index is > xyzw so we need to use numbers
-            byNumber = 1;
+            byNumber = true;
             break;
         }
     }
@@ -216,14 +218,11 @@ const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengt
     if (!byNumber) {
         byNumber = (useNumbersFlip++)%2;
     }
-    // Do not use xyzw for vectors whose length is not 2 or 4 per the spec.
-    if (vecSize != 2 || vecSize != 4 || vecSize != 3)
-        byNumber = 1;
 
-    if( byNumber || vecSize > 4 )
+    if (byNumber)
     {
         idx = 0;
-        // Randomly chose upper and lower case S
+        // Randomly chose upper and lower case S.
         orderString[ idx++ ] = random_in_range(0, 1, d) ? 's' : 'S';
         for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
             // Randomly choose upper and lower case.
@@ -233,8 +232,8 @@ const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengt
     }
     else
     {
+        // Use xyzw.
         for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
-            // Randomly choose upper and lower case.
             orderString[ j ] = names2[ (int)order[ j ] ];
         }
         orderString[ j ] = 0;
@@ -251,7 +250,9 @@ char * get_order_name( ExplicitType vecType, size_t inVecSize, size_t outVecSize
     if( inVecSize == 1 )
         inOrderStr[ 0 ] = 0;
     else
-        sprintf( inOrderStr, "%d.%s", (int)inVecSize, get_order_string( inOrder, outVecSize, lengthToUse, inUseNumerics, d ) );
+        sprintf(inOrderStr, "%d.%s", (int)inVecSize,
+                get_order_string(inOrder, inVecSize, lengthToUse, inUseNumerics,
+                                 d));
     if( outVecSize == 1 )
         outOrderStr[ 0 ] = 0;
     else
@@ -262,33 +263,48 @@ char * get_order_name( ExplicitType vecType, size_t inVecSize, size_t outVecSize
     return orderName;
 }
 
-void    print_hex_mem_dump( const unsigned char *inDataPtr, const unsigned char * inDataPtr2, const unsigned char *expected, const unsigned char *outDataPtr, size_t inVecSize, size_t outVecSize, size_t typeSize )
+void print_hex_mem_dump(const unsigned char *inDataPtr,
+                        const unsigned char *inDataPtr2,
+                        const unsigned char *expected,
+                        const unsigned char *outDataPtr, size_t inVecSize,
+                        size_t outVecSize, size_t typeSize)
 {
-    char error [4096] = "";
-    strcat(error, "      Source: ");
-    for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
+    auto byte_to_hex_str = [](unsigned char v) {
+        // Use a new stream to avoid manipulating state of outer stream.
+        std::ostringstream ss;
+        ss << std::setfill('0') << std::setw(2) << std::right << std::hex << +v;
+        return ss.str();
+    };
+
+    std::ostringstream error;
+    error << "      Source: ";
+    for (size_t j = 0; j < inVecSize * typeSize; j++)
     {
-        sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr[ j ] );
+        error << (j % typeSize ? "" : " ") << byte_to_hex_str(inDataPtr[j])
+              << " ";
     }
-    if( inDataPtr2 != NULL )
+    if (inDataPtr2 != NULL)
     {
-        strcat(error, "\n    Source 2: ");
-        for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
+        error << "\n    Source 2: ";
+        for (size_t j = 0; j < inVecSize * typeSize; j++)
         {
-            sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr2[ j ] );
+            error << (j % typeSize ? "" : " ") << byte_to_hex_str(inDataPtr2[j])
+                  << " ";
         }
     }
-    strcat(error, "\n    Expected: " );
-    for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
+    error << "\n    Expected: ";
+    for (size_t j = 0; j < outVecSize * typeSize; j++)
     {
-        sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)expected[ j ] );
+        error << (j % typeSize ? "" : " ") << byte_to_hex_str(expected[j])
+              << " ";
     }
-    strcat(error, "\n      Actual: " );
-    for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
+    error << "\n      Actual: ";
+    for (size_t j = 0; j < outVecSize * typeSize; j++)
     {
-        sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)outDataPtr[ j ] );
+        error << (j % typeSize ? "" : " ") << byte_to_hex_str(outDataPtr[j])
+              << " ";
     }
-    log_info("%s\n", error);
+    log_info("%s\n", error.str().c_str());
 }
 
 void generate_shuffle_mask( char *outMaskString, size_t maskSize, const ShuffleOrder *order )
@@ -321,7 +337,6 @@ static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl
                                  MTdata d, ShuffleMode shuffleMode = kNormalMode )
 {
     char inOrder[18], shuffledOrder[18];
-    size_t typeSize;
     char kernelSource[MAX_PROGRAM_SIZE], progLine[ 10240 ];
     char *programPtr;
     char inSizeName[4], outSizeName[4], outRealSizeName[4], inSizeArgName[4];
@@ -338,9 +353,6 @@ static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl
     else
         strcpy( inSizeArgName, inSizeName );
 
-
-    typeSize = get_explicit_type_size( vecType );
-
     *outRealVecSize = outVecSize;
 
     if( outVecSize == 1 ||  (outVecSize == 3))
@@ -398,7 +410,9 @@ static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl
     for( unsigned int i = 0; i < numOrders; i++ )
     {
         if( inOrders != NULL )
-            strcpy( inOrder, get_order_string( inOrders[ i ], outVecSize, lengthToUse[i], inUseNumerics, d ) );
+            strcpy(inOrder,
+                   get_order_string(inOrders[i], inVecSize, lengthToUse[i],
+                                    inUseNumerics, d));
         strcpy( shuffledOrder, get_order_string( outOrders[ i ], outVecSize, lengthToUse[i], outUseNumerics, d ) );
 
 
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 972a53c6..7fa3bc08 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -295,7 +295,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
 {
     int err = CL_SUCCESS;
     int s_test_fail = 0;
-    MTdata    d;
+    MTdataHolder d;
     const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
     cl_mem src1 = NULL;
     cl_mem src2 = NULL;
@@ -368,7 +368,7 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
         cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1);
 
     log_info("Testing...");
-    d = init_genrand( gRandomSeed );
+    d = MTdataHolder(gRandomSeed);
     uint64_t i;
     for (i=0; i < blocks; i+=step)
     {
@@ -460,7 +460,6 @@ exit:
     if( ref )   free(ref );
     if( sref )  free(sref );
 
-    free_mtdata(d);
     for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) {
         clReleaseKernel(kernels[vecsize]);
         clReleaseProgram(programs[vecsize]);
diff --git a/test_conformance/spir/datagen.h b/test_conformance/spir/datagen.h
index cf620466..d47d8df9 100644
--- a/test_conformance/spir/datagen.h
+++ b/test_conformance/spir/datagen.h
@@ -1055,8 +1055,6 @@ public:
 
     static cl_channel_order channelOrders[];
     static const char* imageTypes[];
-private:
-    WorkSizeInfo  m_wsInfo;
 };
 
 #endif
diff --git a/test_conformance/spir/kernelargs.h b/test_conformance/spir/kernelargs.h
index 7c5673e8..25acb56e 100644
--- a/test_conformance/spir/kernelargs.h
+++ b/test_conformance/spir/kernelargs.h
@@ -232,7 +232,7 @@ public:
         return &m_samplerObj;
     }
 
-    bool compare( const KernelArg& rhs ) const
+    bool compare(const KernelArg& rhs, float) const
     {
         if (const KernelArgSampler *Rhs = dynamic_cast<const KernelArgSampler*>(&rhs))
         {
diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp
index 06caf33b..b02da734 100644
--- a/test_conformance/spir/main.cpp
+++ b/test_conformance/spir/main.cpp
@@ -124,23 +124,6 @@ void dealloc(T *p)
     if (p) delete p;
 }
 
-static bool is_dir_exits(const char* path)
-{
-    assert(path && "NULL directory");
-#if defined(_WIN32)
-    DWORD ftyp = GetFileAttributesA(path);
-    if (ftyp != INVALID_FILE_ATTRIBUTES && (ftyp & FILE_ATTRIBUTE_DIRECTORY))
-    return true;
-#else // Linux assumed here.
-    if (DIR *pDir = opendir(path))
-    {
-        closedir(pDir);
-        return true;
-    }
-#endif
-    return false;
-}
-
 static void get_spir_version(cl_device_id device,
                              std::vector<Version> &versions)
 {
@@ -205,21 +188,6 @@ static void printError(const std::string& S){
   std::cerr << S << std::endl;
 }
 
-static bool extractKernelAttribute(std::string& kernel_attributes,
-    const std::string& attribute, std::vector<std::string>& attribute_vector) {
-  size_t start = kernel_attributes.find(attribute + "(");
-  if (start == 0) {
-    size_t end = kernel_attributes.find(")", start);
-    if (end != std::string::npos) {
-      size_t length = end-start+1;
-      attribute_vector.push_back(kernel_attributes.substr(start, length));
-      kernel_attributes.erase(start, length);
-      return true;
-    }
-  }
-  return false;
-}
-
 // Extracts suite with the given name, and saves it to disk.
 static void extract_suite(const char *suiteName)
 {
@@ -6253,7 +6221,7 @@ static bool test_image_enumeration(cl_context context, cl_command_queue queue,
                 (FailE)(it.toString(), kernelName);
                 std::cout << "enum_" << it.toString() << " FAILED" << std::endl;
             }
-        } catch(std::exception e)
+        } catch (const std::exception &e)
         {
             (FailE)(it.toString(), kernelName);
             print_error(1, e.what());
@@ -6356,7 +6324,7 @@ static bool test_image_enumeration_3d(cl_context context, cl_command_queue queue
                 (FailE)(it.toString(), kernelName);
                 std::cout << "enum_" << it.toString() << " FAILED" << std::endl;
             }
-        } catch(std::exception e)
+        } catch (const std::exception &e)
         {
             (FailE)(it.toString(), kernelName);
             print_error(1, e.what());
@@ -6454,7 +6422,8 @@ std::vector<std::string> &split(const std::string &s, char delim, std::vector<st
     return elems;
 }
 
-
+// Temporarily disabled, see GitHub #1284
+#if 0
 static bool
 test_kernel_attributes(cl_device_id device, cl_uint width, const char *folder)
 {
@@ -6521,7 +6490,7 @@ test_kernel_attributes(cl_device_id device, cl_uint width, const char *folder)
         }
         (SuccE)(test_name, "");
         log_info("kernel_attributes passed.\n");
-    } catch(std::exception e)
+    } catch (const std::exception &e)
     {
         (FailE)(test_name, "");
         log_info("kernel_attributes FAILED\n");
@@ -6539,6 +6508,7 @@ test_kernel_attributes(cl_device_id device, cl_uint width, const char *folder)
     std::cout << std::endl;
     return success;
 }
+#endif
 
 static bool test_binary_type(cl_device_id device, cl_uint width, const char *folder)
 {
@@ -6587,7 +6557,7 @@ static bool test_binary_type(cl_device_id device, cl_uint width, const char *fol
         }
         (SuccE)(test_name, "");
         log_info("binary_type passed.\n");
-    } catch(std::exception e)
+    } catch (const std::exception &e)
     {
         (FailE)(test_name, "");
         log_info("binary_type FAILED\n");
@@ -6920,7 +6890,7 @@ int main (int argc, const char* argv[])
     cl_uint size_t_width = 0;                            // device address bits (32 or 64).
     cl_int err;
     int failed = 0;
-    int ntests = 0;
+    size_t ntests = 0;
     custom_cout atf_info;
     custom_cerr atf_error;
     override_buff atf_cout(std::cout, atf_info);
diff --git a/test_conformance/spir/run_build_test.cpp b/test_conformance/spir/run_build_test.cpp
index 9264d3a4..46f9d022 100644
--- a/test_conformance/spir/run_build_test.cpp
+++ b/test_conformance/spir/run_build_test.cpp
@@ -80,8 +80,9 @@ void Task::setErrorLog(cl_program prog) {
 //
 // BuildTask
 //
-BuildTask::BuildTask(cl_program prog, cl_device_id dev, const char* options) :
-    m_program(prog), Task(dev, options) {}
+BuildTask::BuildTask(cl_program prog, cl_device_id dev, const char* options)
+    : Task(dev, options), m_program(prog)
+{}
 
 bool BuildTask::execute() {
     cl_int err_code = clBuildProgram(m_program, 0, NULL, m_options.c_str(), NULL, NULL);
@@ -102,8 +103,9 @@ SpirBuildTask::SpirBuildTask(cl_program prog, cl_device_id dev, const char* opti
 // CompileTask
 //
 
-CompileTask::CompileTask(cl_program prog, cl_device_id dev, const char* options) :
-    m_program(prog), Task(dev, options) {}
+CompileTask::CompileTask(cl_program prog, cl_device_id dev, const char* options)
+    : Task(dev, options), m_program(prog)
+{}
 
 void CompileTask::addHeader(const char* hname, cl_program hprog) {
     m_headers.push_back(std::make_pair(hname, hprog));
@@ -162,9 +164,10 @@ SpirCompileTask::SpirCompileTask(cl_program prog, cl_device_id dev, const char*
 // LinkTask
 //
 LinkTask::LinkTask(cl_program* programs, int num_programs, cl_context ctxt,
-                   cl_device_id dev, const char* options) :
-    m_programs(programs), m_numPrograms(num_programs), m_context(ctxt), m_executable(NULL),
-    Task(dev, options) {}
+                   cl_device_id dev, const char* options)
+    : Task(dev, options), m_executable(NULL), m_programs(programs),
+      m_numPrograms(num_programs), m_context(ctxt)
+{}
 
 bool LinkTask::execute() {
     cl_int err_code;
@@ -462,8 +465,7 @@ bool TestRunner::runBuildTest(cl_device_id device, const char *folder,
                 log_info("kernel '%s' failed.\n", kernel_name.c_str());
                 (*m_failureHandler)(test_name, kernel_name);
             }
-        }
-        catch (std::runtime_error err)
+        } catch (const std::runtime_error& err)
         {
             ++failures;
             log_info("kernel '%s' failed: %s\n", kernel_name.c_str(), err.what());
diff --git a/test_conformance/spirv_new/CMakeLists.txt b/test_conformance/spirv_new/CMakeLists.txt
index 7500571d..68720975 100644
--- a/test_conformance/spirv_new/CMakeLists.txt
+++ b/test_conformance/spirv_new/CMakeLists.txt
@@ -12,7 +12,6 @@
 ######################################################################################################
 
 set(MODULE_NAME SPIRV_NEW)
-set(CMAKE_CXX_STANDARD 11)
 
 file(GLOB SPIRV_NEW_SOURCES "*.cpp")
 
diff --git a/test_conformance/spirv_new/procs.h b/test_conformance/spirv_new/procs.h
index 31c65a3b..b293a520 100644
--- a/test_conformance/spirv_new/procs.h
+++ b/test_conformance/spirv_new/procs.h
@@ -39,7 +39,7 @@ Agreement as executed between Khronos and the recipient.
 class baseTestClass {
 public:
     baseTestClass() {}
-    virtual basefn getFunction() = 0;
+    virtual test_function_pointer getFunction() = 0;
 };
 
 class spirvTestsRegistry {
@@ -72,11 +72,11 @@ template <typename T> T *createAndRegister(const char *name, Version version)
                            cl_command_queue queue, int num_elements);          \
     class test_##name##_class : public baseTestClass {                         \
     private:                                                                   \
-        basefn fn;                                                             \
+        test_function_pointer fn;                                              \
                                                                                \
     public:                                                                    \
         test_##name##_class(): fn(test_##name) {}                              \
-        basefn getFunction() { return fn; }                                    \
+        test_function_pointer getFunction() { return fn; }                     \
     };                                                                         \
     test_##name##_class *var_##name =                                          \
         createAndRegister<test_##name##_class>(#name, version);                \
@@ -97,4 +97,4 @@ struct spec_const
 int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID,
                         const cl_context context, const char *prog_name,
                         spec_const spec_const_def = spec_const());
-std::vector<unsigned char> readSPIRV(const char *file_name);
-\ No newline at end of file
+std::vector<unsigned char> readSPIRV(const char *file_name);
diff --git a/test_conformance/spirv_new/test_get_program_il.cpp b/test_conformance/spirv_new/test_get_program_il.cpp
index cf349d17..c535eb53 100644
--- a/test_conformance/spirv_new/test_get_program_il.cpp
+++ b/test_conformance/spirv_new/test_get_program_il.cpp
@@ -41,7 +41,7 @@ TEST_SPIRV_FUNC(get_program_il)
 
         std::vector<unsigned char> spirv_binary = readSPIRV(spvName);
 
-        int file_bytes = spirv_binary.size();
+        size_t file_bytes = spirv_binary.size();
         if (file_bytes == 0)
         {
             test_fail("ERROR: SPIRV file %s not found!\n", spvName);
@@ -102,4 +102,4 @@ TEST_SPIRV_FUNC(get_program_il)
     }
 
     return 0;
-}
-\ No newline at end of file
+}
diff --git a/test_conformance/spirv_new/test_op_composite_construct.cpp b/test_conformance/spirv_new/test_op_composite_construct.cpp
index d07ebd3f..e009eadf 100644
--- a/test_conformance/spirv_new/test_op_composite_construct.cpp
+++ b/test_conformance/spirv_new/test_op_composite_construct.cpp
@@ -55,7 +55,7 @@ int test_composite_construct(cl_device_id deviceID, cl_context context,
 
 TEST_SPIRV_FUNC(op_composite_construct_int4)
 {
-    cl_int4 value = {123, 122, 121, 119};
+    cl_int4 value = { { 123, 122, 121, 119 } };
     std::vector<cl_int4> results(256, value);
     return test_composite_construct(deviceID, context, queue, "composite_construct_int4", results);
 }
@@ -66,7 +66,7 @@ TEST_SPIRV_FUNC(op_composite_construct_struct)
     typedef AbstractStruct2<cl_int2, CustomType1> CustomType2;
 
     CustomType1 value1 = {2100483600, 128};
-    cl_int2 intvals = {2100480000, 2100480000};
+    cl_int2 intvals = { { 2100480000, 2100480000 } };
     CustomType2 value2 = {intvals, value1};
 
     std::vector<CustomType2> results(256, value2);
diff --git a/test_conformance/spirv_new/test_op_constant.cpp b/test_conformance/spirv_new/test_op_constant.cpp
index afffdc35..7c3c146c 100644
--- a/test_conformance/spirv_new/test_op_constant.cpp
+++ b/test_conformance/spirv_new/test_op_constant.cpp
@@ -97,14 +97,14 @@ TEST_CONSTANT(double  , cl_double , 3.141592653589793)
 
 TEST_SPIRV_FUNC(op_constant_int4_simple)
 {
-    cl_int4 value = {123, 122, 121, 119};
+    cl_int4 value = { { 123, 122, 121, 119 } };
     std::vector<cl_int4> results(256, value);
     return test_constant(deviceID, context, queue, "constant_int4_simple", results);
 }
 
 TEST_SPIRV_FUNC(op_constant_int3_simple)
 {
-    cl_int3 value = {123, 122, 121, 0};
+    cl_int3 value = { { 123, 122, 121, 0 } };
     std::vector<cl_int3> results(256, value);
     return test_constant(deviceID, context, queue, "constant_int3_simple",
                          results, isVectorNotEqual<cl_int3, 3>);
@@ -130,7 +130,7 @@ TEST_SPIRV_FUNC(op_constant_struct_struct_simple)
     typedef AbstractStruct2<cl_int2, CustomType1> CustomType2;
 
     CustomType1 value1 = {2100483600, 128};
-    cl_int2 intvals = {2100480000, 2100480000};
+    cl_int2 intvals = { { 2100480000, 2100480000 } };
     CustomType2 value2 = {intvals, value1};
 
     std::vector<CustomType2> results(256, value2);
diff --git a/test_conformance/spirv_new/test_op_copy_object.cpp b/test_conformance/spirv_new/test_op_copy_object.cpp
index b2b99955..868300d3 100644
--- a/test_conformance/spirv_new/test_op_copy_object.cpp
+++ b/test_conformance/spirv_new/test_op_copy_object.cpp
@@ -93,14 +93,14 @@ TEST_COPY(double  , cl_double , 3.141592653589793)
 
 TEST_SPIRV_FUNC(op_copy_int4_simple)
 {
-    cl_int4 value = {123, 122, 121, 119};
+    cl_int4 value = { { 123, 122, 121, 119 } };
     std::vector<cl_int4> results(256, value);
     return test_copy(deviceID, context, queue, "copy_int4_simple", results);
 }
 
 TEST_SPIRV_FUNC(op_copy_int3_simple)
 {
-    cl_int3 value = {123, 122, 121, 0};
+    cl_int3 value = { { 123, 122, 121, 0 } };
     std::vector<cl_int3> results(256, value);
     return test_copy(deviceID, context, queue, "copy_int3_simple",
                      results, isVectorNotEqual<cl_int3, 3>);
@@ -126,7 +126,7 @@ TEST_SPIRV_FUNC(op_copy_struct_struct_simple)
     typedef AbstractStruct2<cl_int2, CustomType1> CustomType2;
 
     CustomType1 value1 = {2100483600, 128};
-    cl_int2 intvals = {2100480000, 2100480000};
+    cl_int2 intvals = { { 2100480000, 2100480000 } };
     CustomType2 value2 = {intvals, value1};
 
     std::vector<CustomType2> results(256, value2);
diff --git a/test_conformance/spirv_new/test_op_fmath.cpp b/test_conformance/spirv_new/test_op_fmath.cpp
index 61e2864d..3cf01837 100644
--- a/test_conformance/spirv_new/test_op_fmath.cpp
+++ b/test_conformance/spirv_new/test_op_fmath.cpp
@@ -173,13 +173,15 @@ int test_fmath(cl_device_id deviceID,
                           lhs, rhs);                \
     }
 
-#define TEST_FMATH_MODE(TYPE, MODE)             \
-    TEST_FMATH_FUNC(TYPE, fadd, MODE)           \
-    TEST_FMATH_FUNC(TYPE, fsub, MODE)           \
-    TEST_FMATH_FUNC(TYPE, fmul, MODE)           \
-    TEST_FMATH_FUNC(TYPE, fdiv, MODE)           \
-    TEST_FMATH_FUNC(TYPE, frem, MODE)           \
-    TEST_FMATH_FUNC(TYPE, fmod, MODE)           \
+#define TEST_FMATH_MODE(TYPE, MODE)                                            \
+    TEST_FMATH_FUNC(TYPE, fadd, MODE)                                          \
+    TEST_FMATH_FUNC(TYPE, fsub, MODE)                                          \
+    TEST_FMATH_FUNC(TYPE, fmul, MODE)                                          \
+    TEST_FMATH_FUNC(TYPE, fdiv, MODE)                                          \
+    // disable those tests until we figure out what the precision requirements
+    // are
+    //    TEST_FMATH_FUNC(TYPE, frem, MODE)
+    //    TEST_FMATH_FUNC(TYPE, fmod, MODE)
 
 #define TEST_FMATH_TYPE(TYPE)                   \
     TEST_FMATH_MODE(TYPE, regular)              \
diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt
index 1ff249cf..a9bc496d 100644
--- a/test_conformance/subgroups/CMakeLists.txt
+++ b/test_conformance/subgroups/CMakeLists.txt
@@ -2,6 +2,7 @@ set(MODULE_NAME SUBGROUPS)
 
 set(${MODULE_NAME}_SOURCES
     main.cpp
+    subhelpers.cpp
     test_barrier.cpp
     test_queries.cpp
     test_workitem.cpp
diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h
index d4f51bec..af6444c0 100644
--- a/test_conformance/subgroups/procs.h
+++ b/test_conformance/subgroups/procs.h
@@ -20,7 +20,6 @@
 #include "harness/kernelHelpers.h"
 #include "harness/errorHelpers.h"
 #include "harness/conversions.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/mt19937.h"
 
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index b2648c30..f779ef37 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -21,39 +21,6 @@
 #include "subhelpers.h"
 #include <set>
 #include <algorithm>
-#include <random>
-
-static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
-                                  const std::string &mask_type,
-                                  cl_uint max_sub_group_size)
-{
-    bs128 mask128;
-    cl_uint4 mask;
-    cl_uint pos = subgroup_local_id;
-    if (mask_type == "eq") mask128.set(pos);
-    if (mask_type == "le" || mask_type == "lt")
-    {
-        for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
-        if (mask_type == "lt") mask128.reset(pos);
-    }
-    if (mask_type == "ge" || mask_type == "gt")
-    {
-        for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
-        if (mask_type == "gt") mask128.reset(pos);
-    }
-
-    // convert std::bitset<128> to uint4
-    auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
-    mask.s0 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s1 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s2 = (mask128 & uint_mask).to_ulong();
-    mask128 >>= 32;
-    mask.s3 = (mask128 & uint_mask).to_ulong();
-
-    return mask;
-}
 
 // DESCRIPTION :
 // sub_group_broadcast - each work_item registers it's own value.
@@ -280,10 +247,10 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
                         {
                             log_error("ERROR: sub_group_%s(%s) "
                                       "mismatch for local id %d in sub "
-                                      "group %d in group %d - got %lu "
-                                      "expected %lu\n",
+                                      "group %d in group %d - %s\n",
                                       operation_names(operation),
-                                      TypeManager<Ty>::name(), i, j, k, rr, tr);
+                                      TypeManager<Ty>::name(), i, j, k,
+                                      print_expected_obtained(tr, rr).c_str());
                             return TEST_FAIL;
                         }
                     }
@@ -393,33 +360,6 @@ template <typename Ty> bool is_floating_point()
         || std::is_same<Ty, subgroups::cl_half>::value;
 }
 
-// limit possible input values to avoid arithmetic rounding/overflow issues.
-// for each subgroup values defined different values
-// for rest of workitems set 1
-// shuffle values
-static void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
-                                         int sb_size)
-{
-    // max product is 720, cl_half has enough precision for it
-    const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 };
-
-    if (sb_size <= non_one_values.size())
-    {
-        safe_values.assign(non_one_values.begin(),
-                           non_one_values.begin() + sb_size);
-    }
-    else
-    {
-        safe_values.assign(sb_size, 1);
-        std::copy(non_one_values.begin(), non_one_values.end(),
-                  safe_values.begin());
-    }
-
-    std::mt19937 mersenne_twister_engine(10000);
-    std::shuffle(safe_values.begin(), safe_values.end(),
-                 mersenne_twister_engine);
-};
-
 template <typename Ty, ArithmeticOp operation>
 void generate_inputs(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
 {
@@ -703,9 +643,10 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
                             log_error(
                                 "ERROR: %s_%s(%s) "
                                 "mismatch for local id %d in sub group %d in "
-                                "group %d Expected: %d Obtained: %d\n",
+                                "group %d %s\n",
                                 func_name.c_str(), operation_names(operation),
-                                TypeManager<Ty>::name(), i, j, k, tr, rr);
+                                TypeManager<Ty>::name(), i, j, k,
+                                print_expected_obtained(tr, rr).c_str());
                             return TEST_FAIL;
                         }
                         tr = calculate<Ty>(tr, mx[ii + active_work_item],
@@ -820,10 +761,10 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
                                 "ERROR: %s_%s(%s) "
                                 "mismatch for local id %d in sub group %d "
                                 "in "
-                                "group %d Expected: %d Obtained: %d\n",
+                                "group %d %s\n",
                                 func_name.c_str(), operation_names(operation),
                                 TypeManager<Ty>::name(), active_work_item, j, k,
-                                tr, rr);
+                                print_expected_obtained(tr, rr).c_str());
                             return TEST_FAIL;
                         }
                     }
@@ -926,10 +867,10 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
                     {
                         log_error("ERROR: %s_%s(%s) "
                                   "mismatch for local id %d in sub group %d in "
-                                  "group %d Expected: %d Obtained: %d\n",
+                                  "group %d %s\n",
                                   func_name.c_str(), operation_names(operation),
                                   TypeManager<Ty>::name(), active_work_item, j,
-                                  k, tr, rr);
+                                  k, print_expected_obtained(tr, rr).c_str());
                         return TEST_FAIL;
                     }
                 }
diff --git a/test_conformance/subgroups/subhelpers.cpp b/test_conformance/subgroups/subhelpers.cpp
new file mode 100644
index 00000000..11268f64
--- /dev/null
+++ b/test_conformance/subgroups/subhelpers.cpp
@@ -0,0 +1,229 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "subhelpers.h"
+
+#include <random>
+
+// Define operator<< for cl_ types, accessing the .s member.
+#define OP_OSTREAM(Ty, VecSize)                                                \
+    std::ostream& operator<<(std::ostream& os, const Ty##VecSize& val)         \
+    {                                                                          \
+        os << +val.s[0]; /* unary plus forces char to be printed as number */  \
+        for (unsigned i = 1; i < VecSize; i++)                                 \
+        {                                                                      \
+            os << ", " << +val.s[i];                                           \
+        }                                                                      \
+        return os;                                                             \
+    }
+
+// Define operator<< for subgroups::cl_ types, accessing the .data member and
+// forwarding to operator<< for the cl_ types.
+#define OP_OSTREAM_SUBGROUP(Ty, VecSize)                                       \
+    std::ostream& operator<<(std::ostream& os, const Ty##VecSize& val)         \
+    {                                                                          \
+        return os << val.data;                                                 \
+    }
+
+// Define operator<< for all vector sizes.
+#define OP_OSTREAM_ALL_VEC(Ty)                                                 \
+    OP_OSTREAM(Ty, 2)                                                          \
+    OP_OSTREAM(Ty, 4)                                                          \
+    OP_OSTREAM(Ty, 8)                                                          \
+    OP_OSTREAM(Ty, 16)                                                         \
+    OP_OSTREAM_SUBGROUP(subgroups::Ty, 3)
+
+OP_OSTREAM_ALL_VEC(cl_char)
+OP_OSTREAM_ALL_VEC(cl_uchar)
+OP_OSTREAM_ALL_VEC(cl_short)
+OP_OSTREAM_ALL_VEC(cl_ushort)
+OP_OSTREAM_ALL_VEC(cl_int)
+OP_OSTREAM_ALL_VEC(cl_uint)
+OP_OSTREAM_ALL_VEC(cl_long)
+OP_OSTREAM_ALL_VEC(cl_ulong)
+OP_OSTREAM_ALL_VEC(cl_float)
+OP_OSTREAM_ALL_VEC(cl_double)
+OP_OSTREAM_ALL_VEC(cl_half)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, )
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 2)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 4)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 8)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 16)
+
+bs128 cl_uint4_to_bs128(cl_uint4 v)
+{
+    return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64)
+        | (bs128(v.s3) << 96);
+}
+
+cl_uint4 bs128_to_cl_uint4(bs128 v)
+{
+    bs128 bs128_ffffffff = 0xffffffffU;
+
+    cl_uint4 r;
+    r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong();
+    r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong();
+    r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong();
+    r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong();
+
+    return r;
+}
+
+cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
+                           const std::string &mask_type,
+                           cl_uint max_sub_group_size)
+{
+    bs128 mask128;
+    cl_uint4 mask;
+    cl_uint pos = subgroup_local_id;
+    if (mask_type == "eq") mask128.set(pos);
+    if (mask_type == "le" || mask_type == "lt")
+    {
+        for (cl_uint i = 0; i <= pos; i++) mask128.set(i);
+        if (mask_type == "lt") mask128.reset(pos);
+    }
+    if (mask_type == "ge" || mask_type == "gt")
+    {
+        for (cl_uint i = pos; i < max_sub_group_size; i++) mask128.set(i);
+        if (mask_type == "gt") mask128.reset(pos);
+    }
+
+    // convert std::bitset<128> to uint4
+    auto const uint_mask = bs128{ static_cast<unsigned long>(-1) };
+    mask.s0 = (mask128 & uint_mask).to_ulong();
+    mask128 >>= 32;
+    mask.s1 = (mask128 & uint_mask).to_ulong();
+    mask128 >>= 32;
+    mask.s2 = (mask128 & uint_mask).to_ulong();
+    mask128 >>= 32;
+    mask.s3 = (mask128 & uint_mask).to_ulong();
+
+    return mask;
+}
+
+const char *const operation_names(ArithmeticOp operation)
+{
+    switch (operation)
+    {
+        case ArithmeticOp::add_: return "add";
+        case ArithmeticOp::max_: return "max";
+        case ArithmeticOp::min_: return "min";
+        case ArithmeticOp::mul_: return "mul";
+        case ArithmeticOp::and_: return "and";
+        case ArithmeticOp::or_: return "or";
+        case ArithmeticOp::xor_: return "xor";
+        case ArithmeticOp::logical_and: return "logical_and";
+        case ArithmeticOp::logical_or: return "logical_or";
+        case ArithmeticOp::logical_xor: return "logical_xor";
+        default: log_error("Unknown operation request\n"); break;
+    }
+    return "";
+}
+
+const char *const operation_names(BallotOp operation)
+{
+    switch (operation)
+    {
+        case BallotOp::ballot: return "ballot";
+        case BallotOp::inverse_ballot: return "inverse_ballot";
+        case BallotOp::ballot_bit_extract: return "bit_extract";
+        case BallotOp::ballot_bit_count: return "bit_count";
+        case BallotOp::ballot_inclusive_scan: return "inclusive_scan";
+        case BallotOp::ballot_exclusive_scan: return "exclusive_scan";
+        case BallotOp::ballot_find_lsb: return "find_lsb";
+        case BallotOp::ballot_find_msb: return "find_msb";
+        case BallotOp::eq_mask: return "eq";
+        case BallotOp::ge_mask: return "ge";
+        case BallotOp::gt_mask: return "gt";
+        case BallotOp::le_mask: return "le";
+        case BallotOp::lt_mask: return "lt";
+        default: log_error("Unknown operation request\n"); break;
+    }
+    return "";
+}
+
+const char *const operation_names(ShuffleOp operation)
+{
+    switch (operation)
+    {
+        case ShuffleOp::shuffle: return "shuffle";
+        case ShuffleOp::shuffle_up: return "shuffle_up";
+        case ShuffleOp::shuffle_down: return "shuffle_down";
+        case ShuffleOp::shuffle_xor: return "shuffle_xor";
+        case ShuffleOp::rotate: return "rotate";
+        case ShuffleOp::clustered_rotate: return "clustered_rotate";
+        default: log_error("Unknown operation request\n"); break;
+    }
+    return "";
+}
+
+const char *const operation_names(NonUniformVoteOp operation)
+{
+    switch (operation)
+    {
+        case NonUniformVoteOp::all: return "all";
+        case NonUniformVoteOp::all_equal: return "all_equal";
+        case NonUniformVoteOp::any: return "any";
+        case NonUniformVoteOp::elect: return "elect";
+        default: log_error("Unknown operation request\n"); break;
+    }
+    return "";
+}
+
+const char *const operation_names(SubgroupsBroadcastOp operation)
+{
+    switch (operation)
+    {
+        case SubgroupsBroadcastOp::broadcast: return "broadcast";
+        case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
+        case SubgroupsBroadcastOp::non_uniform_broadcast:
+            return "non_uniform_broadcast";
+        default: log_error("Unknown operation request\n"); break;
+    }
+    return "";
+}
+
+void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups,
+                               int subgroup_size, int &workgroup_size,
+                               int &last_subgroup_size)
+{
+    number_of_subgroups = 1 + non_uniform_size / subgroup_size;
+    last_subgroup_size = non_uniform_size % subgroup_size;
+    workgroup_size = non_uniform_size;
+}
+
+void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
+                                  int sb_size)
+{
+    // max product is 720, cl_half has enough precision for it
+    const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 };
+
+    if (sb_size <= non_one_values.size())
+    {
+        safe_values.assign(non_one_values.begin(),
+                           non_one_values.begin() + sb_size);
+    }
+    else
+    {
+        safe_values.assign(sb_size, 1);
+        std::copy(non_one_values.begin(), non_one_values.end(),
+                  safe_values.begin());
+    }
+
+    std::mt19937 mersenne_twister_engine(10000);
+    std::shuffle(safe_values.begin(), safe_values.end(),
+                 mersenne_twister_engine);
+}
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 0a2c3903..bcb523cf 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -34,24 +34,17 @@ extern MTdata gMTdata;
 typedef std::bitset<128> bs128;
 extern cl_half_rounding_mode g_rounding_mode;
 
-static bs128 cl_uint4_to_bs128(cl_uint4 v)
-{
-    return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64)
-        | (bs128(v.s3) << 96);
-}
-
-static cl_uint4 bs128_to_cl_uint4(bs128 v)
-{
-    bs128 bs128_ffffffff = 0xffffffffU;
-
-    cl_uint4 r;
-    r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong();
-    r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong();
-    r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong();
-    r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong();
-
-    return r;
-}
+bs128 cl_uint4_to_bs128(cl_uint4 v);
+cl_uint4 bs128_to_cl_uint4(bs128 v);
+cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
+                           const std::string &mask_type,
+                           cl_uint max_sub_group_size);
+
+// limit possible input values to avoid arithmetic rounding/overflow issues.
+// for each subgroup values defined different values
+// for rest of workitems set 1 shuffle values
+void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
+                                  int sb_size);
 
 struct WorkGroupParams
 {
@@ -270,87 +263,11 @@ enum class ArithmeticOp
     logical_xor
 };
 
-static const char *const operation_names(ArithmeticOp operation)
-{
-    switch (operation)
-    {
-        case ArithmeticOp::add_: return "add";
-        case ArithmeticOp::max_: return "max";
-        case ArithmeticOp::min_: return "min";
-        case ArithmeticOp::mul_: return "mul";
-        case ArithmeticOp::and_: return "and";
-        case ArithmeticOp::or_: return "or";
-        case ArithmeticOp::xor_: return "xor";
-        case ArithmeticOp::logical_and: return "logical_and";
-        case ArithmeticOp::logical_or: return "logical_or";
-        case ArithmeticOp::logical_xor: return "logical_xor";
-        default: log_error("Unknown operation request\n"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(BallotOp operation)
-{
-    switch (operation)
-    {
-        case BallotOp::ballot: return "ballot";
-        case BallotOp::inverse_ballot: return "inverse_ballot";
-        case BallotOp::ballot_bit_extract: return "bit_extract";
-        case BallotOp::ballot_bit_count: return "bit_count";
-        case BallotOp::ballot_inclusive_scan: return "inclusive_scan";
-        case BallotOp::ballot_exclusive_scan: return "exclusive_scan";
-        case BallotOp::ballot_find_lsb: return "find_lsb";
-        case BallotOp::ballot_find_msb: return "find_msb";
-        case BallotOp::eq_mask: return "eq";
-        case BallotOp::ge_mask: return "ge";
-        case BallotOp::gt_mask: return "gt";
-        case BallotOp::le_mask: return "le";
-        case BallotOp::lt_mask: return "lt";
-        default: log_error("Unknown operation request\n"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(ShuffleOp operation)
-{
-    switch (operation)
-    {
-        case ShuffleOp::shuffle: return "shuffle";
-        case ShuffleOp::shuffle_up: return "shuffle_up";
-        case ShuffleOp::shuffle_down: return "shuffle_down";
-        case ShuffleOp::shuffle_xor: return "shuffle_xor";
-        case ShuffleOp::rotate: return "rotate";
-        case ShuffleOp::clustered_rotate: return "clustered_rotate";
-        default: log_error("Unknown operation request\n"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(NonUniformVoteOp operation)
-{
-    switch (operation)
-    {
-        case NonUniformVoteOp::all: return "all";
-        case NonUniformVoteOp::all_equal: return "all_equal";
-        case NonUniformVoteOp::any: return "any";
-        case NonUniformVoteOp::elect: return "elect";
-        default: log_error("Unknown operation request\n"); break;
-    }
-    return "";
-}
-
-static const char *const operation_names(SubgroupsBroadcastOp operation)
-{
-    switch (operation)
-    {
-        case SubgroupsBroadcastOp::broadcast: return "broadcast";
-        case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
-        case SubgroupsBroadcastOp::non_uniform_broadcast:
-            return "non_uniform_broadcast";
-        default: log_error("Unknown operation request\n"); break;
-    }
-    return "";
-}
+const char *const operation_names(ArithmeticOp operation);
+const char *const operation_names(BallotOp operation);
+const char *const operation_names(ShuffleOp operation);
+const char *const operation_names(NonUniformVoteOp operation);
+const char *const operation_names(SubgroupsBroadcastOp operation);
 
 class subgroupsAPI {
 public:
@@ -456,6 +373,52 @@ struct cl_half16
 };
 }
 
+// Declare operator<< for cl_ types, accessing the .s member.
+#define OP_OSTREAM(Ty, VecSize)                                                \
+    std::ostream &operator<<(std::ostream &os, const Ty##VecSize &val);
+
+// Declare operator<< for subgroups::cl_ types, accessing the .data member and
+// forwarding to operator<< for the cl_ types.
+#define OP_OSTREAM_SUBGROUP(Ty, VecSize)                                       \
+    std::ostream &operator<<(std::ostream &os, const Ty##VecSize &val);
+
+// Declare operator<< for all vector sizes.
+#define OP_OSTREAM_ALL_VEC(Ty)                                                 \
+    OP_OSTREAM(Ty, 2)                                                          \
+    OP_OSTREAM(Ty, 4)                                                          \
+    OP_OSTREAM(Ty, 8)                                                          \
+    OP_OSTREAM(Ty, 16)                                                         \
+    OP_OSTREAM_SUBGROUP(subgroups::Ty, 3)
+
+OP_OSTREAM_ALL_VEC(cl_char)
+OP_OSTREAM_ALL_VEC(cl_uchar)
+OP_OSTREAM_ALL_VEC(cl_short)
+OP_OSTREAM_ALL_VEC(cl_ushort)
+OP_OSTREAM_ALL_VEC(cl_int)
+OP_OSTREAM_ALL_VEC(cl_uint)
+OP_OSTREAM_ALL_VEC(cl_long)
+OP_OSTREAM_ALL_VEC(cl_ulong)
+OP_OSTREAM_ALL_VEC(cl_float)
+OP_OSTREAM_ALL_VEC(cl_double)
+OP_OSTREAM_ALL_VEC(cl_half)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, )
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 2)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 4)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 8)
+OP_OSTREAM_SUBGROUP(subgroups::cl_half, 16)
+
+#undef OP_OSTREAM
+#undef OP_OSTREAM_SUBGROUP
+#undef OP_OSTREAM_ALL_VEC
+
+template <typename Ty>
+std::string print_expected_obtained(const Ty &expected, const Ty &obtained)
+{
+    std::ostringstream oss;
+    oss << "Expected: " << expected << " Obtained: " << obtained;
+    return oss.str();
+}
+
 static bool int64_ok(cl_device_id device)
 {
     char profile[128];
@@ -1686,15 +1649,9 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
     }
 };
 
-static void set_last_workgroup_params(int non_uniform_size,
-                                      int &number_of_subgroups,
-                                      int subgroup_size, int &workgroup_size,
-                                      int &last_subgroup_size)
-{
-    number_of_subgroups = 1 + non_uniform_size / subgroup_size;
-    last_subgroup_size = non_uniform_size % subgroup_size;
-    workgroup_size = non_uniform_size;
-}
+void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups,
+                               int subgroup_size, int &workgroup_size,
+                               int &last_subgroup_size);
 
 template <typename Ty>
 static void set_randomdata_for_subgroup(Ty *workgroup, int wg_offset,
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index 3882311d..6795a411 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -75,7 +75,7 @@ template <typename Ty> struct BALLOT
                     {
                         v = genrand_int32(gMTdata);
                     }
-                    cl_uint4 v4 = { v, 0, 0, 0 };
+                    cl_uint4 v4 = { { v, 0, 0, 0 } };
                     t[wi_id + wg_offset] = v4;
                 }
             }
@@ -307,13 +307,13 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
 
                     if (wi_id & 1)
                     {
-                        bit_value ? expected_result = { 1, 0, 0, 1 }
-                                  : expected_result = { 0, 0, 0, 1 };
+                        bit_value ? expected_result = { { 1, 0, 0, 1 } }
+                                  : expected_result = { { 0, 0, 0, 1 } };
                     }
                     else
                     {
-                        bit_value ? expected_result = { 1, 0, 0, 2 }
-                                  : expected_result = { 0, 0, 0, 2 };
+                        bit_value ? expected_result = { { 1, 0, 0, 2 } }
+                                  : expected_result = { { 0, 0, 0, 2 } };
                     }
 
                     device_result = my[wg_offset + wi_id];
@@ -398,8 +398,8 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
                 for (wi_id = 0; wi_id < current_sbs; ++wi_id)
                 { // for each subgroup work item
 
-                    wi_id & 1 ? expected_result = { 1, 0, 0, 1 }
-                              : expected_result = { 1, 0, 0, 2 };
+                    wi_id & 1 ? expected_result = { { 1, 0, 0, 1 } }
+                              : expected_result = { { 1, 0, 0, 2 } };
 
                     device_result = my[wg_offset + wi_id];
                     if (!compare(device_result, expected_result))
@@ -691,7 +691,7 @@ template <typename Ty, BallotOp operation> struct SMASK
                 {
                     int midx = 4 * wg_offset + 4 * wi_id;
                     cl_uint max_sub_group_size = m[midx + 2];
-                    cl_uint4 expected_mask = { 0 };
+                    cl_uint4 expected_mask = { { 0 } };
                     expected_mask = generate_bit_mask(
                         wi_id, operation_names(operation), max_sub_group_size);
                     set_value(t[wg_offset + wi_id], expected_mask);
@@ -744,9 +744,12 @@ template <typename Ty, BallotOp operation> struct SMASK
                     {
                         log_error("ERROR:  get_sub_group_%s_mask... mismatch "
                                   "for local id %d in sub group %d in group "
-                                  "%d, obtained %d, expected %d\n",
+                                  "%d, %s\n",
                                   operation_names(operation), wi_id, sb_id,
-                                  wg_id, device_result, expected_result);
+                                  wg_id,
+                                  print_expected_obtained(expected_result,
+                                                          device_result)
+                                      .c_str());
                         return TEST_FAIL;
                     }
                 }
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index 38652d51..4e93e558 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -51,7 +51,7 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
     static void log_test(const WorkGroupParams &test_params,
                          const char *extra_text)
     {
-        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ...%s\n",
+        log_info("  sub_group_clustered_reduce_%s(%s, %zu bytes) ...%s\n",
                  operation_names(operation), TypeManager<Ty>::name(),
                  sizeof(Ty), extra_text);
     }
@@ -90,7 +90,7 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
                 if (dts != sizeof(Ty))
                 {
                     log_error("ERROR: sub_group_clustered_reduce_%s(%s) "
-                              "wrong data type size detected, expected: %d, "
+                              "wrong data type size detected, expected: %zu, "
                               "used by device %d, in group %d\n",
                               operation_names(operation),
                               TypeManager<Ty>::name(), sizeof(Ty), dts, k);
diff --git a/test_conformance/vectors/defines.h b/test_conformance/vectors/defines.h
index c96c3dad..0ea0b00d 100644
--- a/test_conformance/vectors/defines.h
+++ b/test_conformance/vectors/defines.h
@@ -15,7 +15,6 @@
 //
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
diff --git a/test_conformance/vectors/procs.h b/test_conformance/vectors/procs.h
index db423a6a..7a6dba41 100644
--- a/test_conformance/vectors/procs.h
+++ b/test_conformance/vectors/procs.h
@@ -15,7 +15,6 @@
 //
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
-#include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
diff --git a/test_conformance/vectors/test_step.cpp b/test_conformance/vectors/test_step.cpp
index 089bad2f..c4f673f9 100644
--- a/test_conformance/vectors/test_step.cpp
+++ b/test_conformance/vectors/test_step.cpp
@@ -55,17 +55,6 @@ int test_step_internal(cl_device_id deviceID, cl_context context,
         return -1;
     }
 
-    // detect whether profile of the device is embedded
-    char profile[1024] = "";
-    err = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile,
-                          NULL);
-    if (err)
-    {
-        print_error(err, "clGetDeviceInfo for CL_DEVICE_PROFILE failed\n");
-        return -1;
-    }
-    gIsEmbedded = NULL != strstr(profile, "EMBEDDED_PROFILE");
-
     for (typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
     {
         if (types[typeIdx] == kDouble)
diff --git a/test_conformance/vulkan/CMakeLists.txt b/test_conformance/vulkan/CMakeLists.txt
index 4f43172a..9778693b 100644
--- a/test_conformance/vulkan/CMakeLists.txt
+++ b/test_conformance/vulkan/CMakeLists.txt
@@ -1,12 +1,12 @@
 set (MODULE_NAME VULKAN)
 
 if(WIN32)
-    list(APPEND CLConform_LIBRARIES vulkan-1)
+    list(APPEND CLConform_LIBRARIES vulkan-1 vulkan_wrapper)
 else(WIN32)
-    list(APPEND CLConform_LIBRARIES vulkan dl)
+    list(APPEND CLConform_LIBRARIES vulkan dl vulkan_wrapper)
 endif(WIN32)
 set(CMAKE_CXX_FLAGS "-fpermissive")
-if(WIN32) 
+if(WIN32)
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVK_USE_PLATFORM_WIN32_KHR")
 endif(WIN32)
 
@@ -26,11 +26,7 @@ set (${MODULE_NAME}_SOURCES
         test_vulkan_interop_image.cpp
         test_vulkan_api_consistency.cpp
         test_vulkan_platform_device_info.cpp
-        vulkan_interop_common/vulkan_wrapper.cpp
-        vulkan_interop_common/vulkan_interop_common.cpp
-        vulkan_interop_common/opencl_vulkan_wrapper.cpp
-        vulkan_interop_common/vulkan_utility.cpp
-        vulkan_interop_common/vulkan_list_map.cpp
+        vulkan_interop_common.cpp
         ../../test_common/harness/genericThread.cpp
         ../../test_common/harness/errorHelpers.cpp
         ../../test_common/harness/testHarness.cpp
@@ -45,6 +41,6 @@ set (${MODULE_NAME}_SOURCES
 set_source_files_properties(
     ${${MODULE_NAME}_SOURCES}
     PROPERTIES LANGUAGE CXX)
-include_directories("./vulkan_interop_common/")
+include_directories("../common/vulkan_wrapper")
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
index 2eeb0c36..5901420a 100644
--- a/test_conformance/vulkan/main.cpp
+++ b/test_conformance/vulkan/main.cpp
@@ -340,7 +340,11 @@ int main(int argc, const char *argv[])
     // Execute tests.
     // Note: don't use the entire harness, because we have a different way of
     // obtaining the device (via the context)
+    test_harness_config config{};
+    config.forceNoContextCreation = true;
+    config.numElementsToUse = 1024;
+    config.queueProps = 0;
     errNum = parseAndCallCommandLineTests(argCount, argList, devices[device_no],
-                                          test_num, test_list, true, 0, 1024);
+                                          test_num, test_list, config);
     return errNum;
 }
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
index 7577de09..47a31665 100644
--- a/test_conformance/vulkan/test_vulkan_interop_image.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -14,10 +14,10 @@
 // limitations under the License.
 //
 
-#define NOMINMAX
 #include <vulkan_interop_common.hpp>
 #include <string>
 #include "harness/errorHelpers.h"
+#include <algorithm>
 
 #define MAX_2D_IMAGES 5
 #define MAX_2D_IMAGE_WIDTH 1024
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp b/test_conformance/vulkan/vulkan_interop_common.cpp
index db9d168f..db9d168f 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common.cpp
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp b/test_conformance/vulkan/vulkan_interop_common.hpp
index 18d84f09..18d84f09 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common.hpp
diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp
index ccf17b6e..41abd124 100644
--- a/test_conformance/workgroups/test_wg_all.cpp
+++ b/test_conformance/workgroups/test_wg_all.cpp
@@ -71,7 +71,6 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu
     cl_int       *output_ptr;
     cl_program   program;
     cl_kernel    kernel;
-    void         *values[2];
     size_t       threads[1];
     size_t       wg_size[1];
     size_t       num_elements;
@@ -124,8 +123,6 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu
         return -1;
     }
 
-    values[0] = streams[0];
-    values[1] = streams[1];
     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
     if (err != CL_SUCCESS)
diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp
index 4785ad51..e0242cfb 100644
--- a/test_conformance/workgroups/test_wg_any.cpp
+++ b/test_conformance/workgroups/test_wg_any.cpp
@@ -71,7 +71,6 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu
     cl_int       *output_ptr;
     cl_program   program;
     cl_kernel    kernel;
-    void         *values[2];
     size_t       threads[1];
     size_t       wg_size[1];
     size_t       num_elements;
@@ -124,8 +123,6 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu
         return -1;
     }
 
-    values[0] = streams[0];
-    values[1] = streams[1];
     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
     if (err != CL_SUCCESS)
diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp
index 29380211..e24ac7b9 100644
--- a/test_conformance/workgroups/test_wg_broadcast.cpp
+++ b/test_conformance/workgroups/test_wg_broadcast.cpp
@@ -168,7 +168,6 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command
     cl_float     *output_ptr;
     cl_program   program;
     cl_kernel    kernel;
-    void         *values[2];
     size_t       globalsize[1];
     size_t       wg_size[1];
     size_t       num_elements;
@@ -221,8 +220,6 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command
         return -1;
     }
 
-    values[0] = streams[0];
-    values[1] = streams[1];
     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
     if (err != CL_SUCCESS)
@@ -275,7 +272,6 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command
     cl_float     *output_ptr;
     cl_program   program;
     cl_kernel    kernel;
-    void         *values[2];
     size_t       globalsize[2];
     size_t       localsize[2];
     size_t       wg_size[1];
@@ -350,8 +346,6 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command
         return -1;
     }
 
-    values[0] = streams[0];
-    values[1] = streams[1];
     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
     if (err != CL_SUCCESS)
@@ -402,7 +396,6 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command
     cl_float     *output_ptr;
     cl_program   program;
     cl_kernel    kernel;
-    void         *values[2];
     size_t       globalsize[3];
     size_t       localsize[3];
     size_t       wg_size[1];
@@ -478,8 +471,6 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command
         return -1;
     }
 
-    values[0] = streams[0];
-    values[1] = streams[1];
     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
     if (err != CL_SUCCESS)
diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
index aa02391c..648e68ce 100644
--- a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
+++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
@@ -108,12 +108,6 @@ bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); }
 bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); }
 
 #define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
-/* The numbers we chose in the value_range are to be used for the second and
-   third dimension of the global work group size. The numbers below cover many
-   different cases: 1024 is a power of 2, 3 is an odd and small prime number, 12
-   is a multiple of 4 but not a power of 2, 1031 is a large odd and prime number
-   and 1 is to test the lack of this dimension if the others are present */
-const size_t value_range[] = { 1024, 3, 12, 1031, 1 };
 /* The value_range_nD contains numbers to be used for the experiments with 2D
    and 3D global work sizes. This is because we need smaller numbers so that the
    resulting number of work items is meaningful and does not become too large.
@@ -271,7 +265,7 @@ int do_test_work_group_suggested_local_size(
         // return error if no number is found due to the skip condition
         err = -1;
         unsigned int j = 0;
-        size_t num_elems = NELEMS(value_range);
+        size_t num_elems = NELEMS(value_range_nD);
         for (size_t i = start; i < end; i += incr)
         {
             if (skip_cond(i)) continue;
author	Sadaf Ebrahimi <sadafebrahimi@google.com>	2023-07-25 22:33:25 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	2023-07-25 22:33:25 +0000
commit	150005d3d187a681ba110128ab0272b18cbcb468 (patch)
tree	e96b89c45428ba1382321718785a473dbf855b53
parent	6b9ff13286194c7a2b38d624eeee38dc35987dc8 (diff)
parent	b8ba8e487f4aa32d8bc42a019e4bc8bc2e330b99 (diff)
download	OpenCL-CTS-150005d3d187a681ba110128ab0272b18cbcb468.tar.gz