diff options
author | Sadaf Ebrahimi <sadafebrahimi@google.com> | 2023-10-17 19:27:15 +0000 |
---|---|---|
committer | Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> | 2023-10-17 19:27:15 +0000 |
commit | fa4ba9dfa1f52ebf659adcb7d23e8294eb866d64 (patch) | |
tree | 4aace48f642ba4929dd631f3bc46285d3c853c3e | |
parent | 150005d3d187a681ba110128ab0272b18cbcb468 (diff) | |
parent | 3cc456717b7499d7e06ac59aa5c419c6e5e5b3b8 (diff) | |
download | OpenCL-CTS-fa4ba9dfa1f52ebf659adcb7d23e8294eb866d64.tar.gz |
Upgrade OpenCL-CTS to v2023-10-10-00 am: 3cc456717b
Original change: https://android-review.googlesource.com/c/platform/external/OpenCL-CTS/+/2793473
Change-Id: I79bd9760da26a23f2e2b494cff88a3e3215e9b63
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
164 files changed, 14017 insertions, 10412 deletions
diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index 1ba63abd..635e4a7e 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -13,16 +13,16 @@ jobs: fail-fast: false matrix: mainmatrix: [true] - os: [ubuntu-20.04, macos-latest, windows-latest] + os: [ubuntu-22.04, macos-latest, windows-latest] include: - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: true gl: 1 extra: " gl" - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: false arch: arm - - os: ubuntu-20.04 + - os: ubuntu-22.04 mainmatrix: false arch: aarch64 debug: 1 @@ -55,10 +55,10 @@ jobs: run: ./presubmit.sh formatcheck: name: Check code format - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Install packages - run: sudo apt install -y clang-format clang-format-9 + run: sudo apt install -y clang-format clang-format-11 - uses: actions/checkout@v3 with: fetch-depth: 0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 4fce58d8..6c9bbf6f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,10 +103,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang" if(NOT CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo|MinSizeRel") # Enable more warnings if not doing a release build. add_cxx_flag_if_supported(-Wall) - # Suppress warnings that currently trigger on the code base. - # This list should shrink over time when warnings are fixed. - add_cxx_flag_if_supported(-Wno-sometimes-uninitialized) - add_cxx_flag_if_supported(-Wno-sign-compare) endif() add_cxx_flag_if_supported(-Wno-narrowing) add_cxx_flag_if_supported(-Wno-format) @@ -9,11 +9,11 @@ third_party { type: GIT value: "https://github.com/KhronosGroup/OpenCL-CTS.git" } - version: "v2023-05-16-00" + version: "v2023-10-10-00" license_type: NOTICE last_upgrade_date { year: 2023 - month: 7 - day: 25 + month: 10 + day: 17 } } diff --git a/check-format.sh b/check-format.sh index be8f9d78..b5dc0a72 100755 --- a/check-format.sh +++ b/check-format.sh @@ -2,7 +2,7 @@ # Arg used to specify non-'origin/main' comparison branch ORIGIN_BRANCH=${1:-"origin/main"} -CLANG_BINARY=${2:-"`which clang-format-9`"} +CLANG_BINARY=${2:-"`which clang-format-11`"} # Run git-clang-format to check for violations CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY) diff --git a/presubmit.sh b/presubmit.sh index 605c10b0..10354abf 100755 --- a/presubmit.sh +++ b/presubmit.sh @@ -77,7 +77,6 @@ cmake .. -G Ninja \ -DBUILD_WSI_XLIB_SUPPORT=OFF \ -DBUILD_WSI_XCB_SUPPORT=OFF \ -DBUILD_WSI_WAYLAND_SUPPORT=OFF \ - -DUSE_GAS=OFF \ -C helper.cmake .. cmake --build . -j2 diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp index b9f95a94..62f63253 100644 --- a/test_common/gl/helpers.cpp +++ b/test_common/gl/helpers.cpp @@ -966,12 +966,13 @@ void reorder_verification_buffer(GLenum glFormat, GLenum glType, char* buffer, s #ifdef GL_VERSION_3_2 -#define check_gl_error() \ -{ \ - GLenum errnom = GL_NO_ERROR;\ - if ((errnom = glGetError()) != GL_NO_ERROR)\ - log_error("GL Error: 0x%04X at %s:%d\n", errnom, __FILE__, __LINE__);\ -} +#define CHECK_GL_ERROR() \ + { \ + GLenum errnom = GL_NO_ERROR; \ + if ((errnom = glGetError()) != GL_NO_ERROR) \ + log_error("GL Error: 0x%04X at %s:%d\n", errnom, __FILE__, \ + __LINE__); \ + } const char *get_gl_vector_type( GLenum internalformat ) { @@ -1045,10 +1046,12 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples // Check if the renderer supports enough samples GLint max_samples = get_gl_max_samples(target, internalFormat); - check_gl_error() + CHECK_GL_ERROR() if (max_samples < (GLint)samples) - log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples); + log_error("GL error: requested samples (%zu) exceeds renderer max " + "samples (%d)\n", + samples, max_samples); // Setup the GLSL program const GLchar *vertex_source = @@ -1075,36 +1078,36 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vertex_shader, 1, &vertex_source, NULL); glCompileShader(vertex_shader); - check_gl_error() + CHECK_GL_ERROR() glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(fragment_shader, 1, &fragment_source, NULL); glCompileShader(fragment_shader); - check_gl_error() + CHECK_GL_ERROR() GLuint prog = glCreateProgram(); glAttachShader(prog, vertex_shader); glAttachShader(prog, fragment_shader); - check_gl_error() + CHECK_GL_ERROR() glBindAttribLocation(prog, 0, "att0"); glLinkProgram(prog); - check_gl_error() + CHECK_GL_ERROR() // Setup the FBO and texture glFramebufferWrapper fbo; glGenFramebuffers(1, &fbo); glBindFramebuffer(GL_FRAMEBUFFER, fbo); - check_gl_error() + CHECK_GL_ERROR() glViewport(0, 0, width, height); - check_gl_error() + CHECK_GL_ERROR() GLuint tex = 0; glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, tex); glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, internalFormat, width, height, fixedSampleLocations); - check_gl_error() + CHECK_GL_ERROR() GLint attachment; switch (internalFormat) { @@ -1122,7 +1125,7 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples } glFramebufferTexture(GL_FRAMEBUFFER, attachment, tex, 0); - check_gl_error() + CHECK_GL_ERROR() GLint status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status == GL_FRAMEBUFFER_UNSUPPORTED) { @@ -1142,22 +1145,24 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples // Check if the framebuffer supports enough samples GLint fbo_samples = 0; glGetIntegerv(GL_SAMPLES, &fbo_samples); - check_gl_error(); + CHECK_GL_ERROR(); if (fbo_samples < (GLint)samples) - log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples); + log_error( + "GL Error: requested samples (%zu) exceeds FBO capability (%d)\n", + samples, fbo_samples); glUseProgram(prog); - check_gl_error() + CHECK_GL_ERROR() if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) { glDisable(GL_DEPTH_TEST); - check_gl_error() + CHECK_GL_ERROR() } else { glEnable(GL_DEPTH_TEST); glDepthFunc(GL_ALWAYS); - check_gl_error() + CHECK_GL_ERROR() } // Setup the VBO for rendering a quad @@ -1172,14 +1177,14 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW); - check_gl_error() + CHECK_GL_ERROR() glVertexArraysWrapper vao; glGenVertexArrays(1, &vao); glBindVertexArray(vao); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0); - check_gl_error() + CHECK_GL_ERROR() //clearing color and depth buffer glClearColor(0, 0, 0, 0); @@ -1223,13 +1228,13 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples color += color_delta; glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - check_gl_error(); + CHECK_GL_ERROR(); glFlush(); } glDisable(GL_SAMPLE_MASK); - check_gl_error(); + CHECK_GL_ERROR(); *outTextureID = tex; @@ -1306,7 +1311,9 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, GLint max_samples = get_gl_max_samples(target, internalFormat); if (max_samples < (GLint)samples) - log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples); + log_error("GL error: requested samples (%zu) exceeds renderer max " + "samples (%d)\n", + samples, max_samples); // Setup the GLSL program const GLchar *vertex_source = @@ -1333,36 +1340,36 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER); glShaderSource(vertex_shader, 1, &vertex_source, NULL); glCompileShader(vertex_shader); - check_gl_error() + CHECK_GL_ERROR() glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER); glShaderSource(fragment_shader, 1, &fragment_source, NULL); glCompileShader(fragment_shader); - check_gl_error() + CHECK_GL_ERROR() glProgramWrapper prog = glCreateProgram(); glAttachShader(prog, vertex_shader); glAttachShader(prog, fragment_shader); - check_gl_error() + CHECK_GL_ERROR() glBindAttribLocation(prog, 0, "att0"); glLinkProgram(prog); - check_gl_error() + CHECK_GL_ERROR() // Setup the FBO and texture glFramebufferWrapper fbo; glGenFramebuffers(1, &fbo); glBindFramebuffer(GL_FRAMEBUFFER, fbo); - check_gl_error() + CHECK_GL_ERROR() glViewport(0, 0, width, height); - check_gl_error() + CHECK_GL_ERROR() GLuint tex = 0; glGenTextures(1, &tex); glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, tex); glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, samples, internalFormat, width, height, total_layers, fixedSampleLocations); - check_gl_error() + CHECK_GL_ERROR() GLint attachment; switch (internalFormat) { @@ -1384,12 +1391,12 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) { glDisable(GL_DEPTH_TEST); - check_gl_error() + CHECK_GL_ERROR() } else { glEnable(GL_DEPTH_TEST); glDepthFunc(GL_ALWAYS); - check_gl_error() + CHECK_GL_ERROR() } // Setup the VBO for rendering a quad @@ -1404,18 +1411,18 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, glGenBuffers(1, &vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo); glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW); - check_gl_error() + CHECK_GL_ERROR() glVertexArraysWrapper vao; glGenVertexArrays(1, &vao); glBindVertexArray(vao); glEnableVertexAttribArray(0); glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0); - check_gl_error() + CHECK_GL_ERROR() for (size_t l=0; l!=total_layers; ++l) { glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, tex, 0, l); - check_gl_error() + CHECK_GL_ERROR() GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); if (status == GL_FRAMEBUFFER_UNSUPPORTED) { @@ -1435,13 +1442,15 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, // Check if the framebuffer supports enough samples GLint fbo_samples = 0; glGetIntegerv(GL_SAMPLES, &fbo_samples); - check_gl_error(); + CHECK_GL_ERROR(); if (fbo_samples < (GLint)samples) - log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples); + log_error( + "GL Error: requested samples (%zu) exceeds FBO capability (%d)\n", + samples, fbo_samples); glUseProgram(prog); - check_gl_error() + CHECK_GL_ERROR() //clearing color and depth buffer glClearColor(0, 0, 0, 0); @@ -1482,13 +1491,13 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height, glUniform1f(glGetUniformLocation(prog, "depthVal"), val); glDrawArrays(GL_TRIANGLE_FAN, 0, 4); - check_gl_error(); + CHECK_GL_ERROR(); glFlush(); } glDisable(GL_SAMPLE_MASK); - check_gl_error(); + CHECK_GL_ERROR(); } *outTextureID = tex; @@ -1715,7 +1724,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < (size_t)width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; @@ -1733,7 +1742,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height, // Reverse and reorder to validate since in the // kernel the read_imagef() call always returns RGBA cl_uchar *p = (cl_uchar *)buffer; - for( size_t i = 0; i < width * height; i++ ) + for (GLsizei i = 0; i < width * height; i++) { cl_uchar uc0 = p[i * 4 + 0]; cl_uchar uc1 = p[i * 4 + 1]; diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h index 4053b7ee..a42f2917 100644 --- a/test_common/harness/compat.h +++ b/test_common/harness/compat.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _COMPAT_H_ -#define _COMPAT_H_ +#ifndef COMPAT_H_ +#define COMPAT_H_ #if defined(_WIN32) && defined(_MSC_VER) #include <Windows.h> @@ -398,4 +398,4 @@ EXTERN_C int __builtin_clz(unsigned int pattern); #define sleep(sec) Sleep((sec)*1000) #endif -#endif // _COMPAT_H_ +#endif // COMPAT_H_ diff --git a/test_common/harness/crc32.h b/test_common/harness/crc32.h index 65ca15ee..69587011 100644 --- a/test_common/harness/crc32.h +++ b/test_common/harness/crc32.h @@ -15,8 +15,8 @@ Agreement or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient. ******************************************************************/ -#ifndef _CRC32_H_ -#define _CRC32_H_ +#ifndef CRC32_H_ +#define CRC32_H_ #include <stdint.h> #include <stddef.h> diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp index f5665deb..2d503eb5 100644 --- a/test_common/harness/mt19937.cpp +++ b/test_common/harness/mt19937.cpp @@ -51,6 +51,7 @@ #include "harness/alloc.h" #ifdef __SSE2__ +#include <mutex> #include <emmintrin.h> #endif @@ -107,7 +108,7 @@ cl_uint genrand_int32(MTdata d) /* mag01[x] = x * MATRIX_A for x=0,1 */ static const cl_uint mag01[2] = { 0x0UL, MATRIX_A }; #ifdef __SSE2__ - static volatile int init = 0; + static std::once_flag init_flag; static union { __m128i v; cl_uint s[4]; @@ -123,8 +124,7 @@ cl_uint genrand_int32(MTdata d) int kk; #ifdef __SSE2__ - if (0 == init) - { + auto init_fn = []() { upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] = upper_mask.s[3] = UPPER_MASK; lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] = @@ -134,8 +134,8 @@ cl_uint genrand_int32(MTdata d) MATRIX_A; c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL; c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL; - init = 1; - } + }; + std::call_once(init_flag, init_fn); #endif kk = 0; diff --git a/test_common/harness/stringHelpers.h b/test_common/harness/stringHelpers.h new file mode 100644 index 00000000..e1275f10 --- /dev/null +++ b/test_common/harness/stringHelpers.h @@ -0,0 +1,42 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef STRING_HELPERS_H +#define STRING_HELPERS_H + +#include <memory> +#include <stdexcept> +#include <string> + +inline std::string concat_kernel(const char *sstr[], int num) +{ + std::string res; + for (int i = 0; i < num; i++) res += std::string(sstr[i]); + return res; +} + +template <typename... Args> +inline std::string str_sprintf(const std::string &str, Args... args) +{ + int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1; + if (str_size <= 0) throw std::runtime_error("Formatting error."); + size_t s = static_cast<size_t>(str_size); + std::unique_ptr<char[]> buffer(new char[s]); + std::snprintf(buffer.get(), s, str.c_str(), args...); + return std::string(buffer.get(), buffer.get() + s - 1); +} + +#endif // STRING_HELPERS_H diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp index 95ea8163..3d743e71 100644 --- a/test_common/harness/testHarness.cpp +++ b/test_common/harness/testHarness.cpp @@ -835,9 +835,9 @@ void callTestFunctions(test_definition testList[], std::vector<std::thread *> threads; test_harness_state state = { testList, resultTestList, deviceToUse, config }; - for (int i = 0; i < config.numWorkerThreads; i++) + for (unsigned i = 0; i < config.numWorkerThreads; i++) { - log_info("Spawning worker thread %i\n", i); + log_info("Spawning worker thread %u\n", i); threads.push_back(new std::thread(test_function_runner, &state)); } diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h index 50c7c938..ad11b480 100644 --- a/test_common/harness/typeWrappers.h +++ b/test_common/harness/typeWrappers.h @@ -145,6 +145,48 @@ using clSamplerWrapper = using clEventWrapper = wrapper_details::Wrapper<cl_event, clRetainEvent, clReleaseEvent>; +class clSVMWrapper { + void *Ptr = nullptr; + cl_context Ctx = nullptr; + +public: + clSVMWrapper() = default; + + clSVMWrapper(cl_context C, size_t Size, + cl_svm_mem_flags F = CL_MEM_READ_WRITE) + : Ctx(C) + { + Ptr = clSVMAlloc(C, F, Size, 0); + } + + clSVMWrapper &operator=(void *other) = delete; + clSVMWrapper(clSVMWrapper const &other) = delete; + clSVMWrapper &operator=(clSVMWrapper const &other) = delete; + clSVMWrapper(clSVMWrapper &&other) + { + Ptr = other.Ptr; + Ctx = other.Ctx; + other.Ptr = nullptr; + other.Ctx = nullptr; + } + clSVMWrapper &operator=(clSVMWrapper &&other) + { + Ptr = other.Ptr; + Ctx = other.Ctx; + other.Ptr = nullptr; + other.Ctx = nullptr; + return *this; + } + + ~clSVMWrapper() + { + if (Ptr) clSVMFree(Ctx, Ptr); + } + + void *operator()() const { return Ptr; } +}; + + class clProtectedImage { public: clProtectedImage() diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt index 2d01a825..2ad2f821 100644 --- a/test_conformance/SVM/CMakeLists.txt +++ b/test_conformance/SVM/CMakeLists.txt @@ -17,4 +17,6 @@ set(${MODULE_NAME}_SOURCES test_migrate.cpp ) +set_gnulike_module_compile_flags("-Wno-sometimes-uninitialized -Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/allocations/CMakeLists.txt b/test_conformance/allocations/CMakeLists.txt index a4043806..b6031225 100644 --- a/test_conformance/allocations/CMakeLists.txt +++ b/test_conformance/allocations/CMakeLists.txt @@ -8,4 +8,6 @@ set(${MODULE_NAME}_SOURCES allocation_utils.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp index 9d0e8777..5a77c3a7 100644 --- a/test_conformance/allocations/allocation_execute.cpp +++ b/test_conformance/allocations/allocation_execute.cpp @@ -79,20 +79,30 @@ int check_image(cl_command_queue queue, cl_mem mem) { return -1; } - if (type == CL_MEM_OBJECT_BUFFER) { - log_error("Expected image object, not buffer.\n"); - return -1; - } else if (type == CL_MEM_OBJECT_IMAGE2D) { - error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL); - if (error) { - print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH."); - return -1; - } - error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL); - if (error) { - print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT."); + switch (type) + { + case CL_MEM_OBJECT_BUFFER: + log_error("Expected image object, not buffer.\n"); return -1; - } + case CL_MEM_OBJECT_IMAGE2D: + error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, + NULL); + if (error) + { + print_error(error, + "clGetMemObjectInfo failed for CL_IMAGE_WIDTH."); + return -1; + } + error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), + &height, NULL); + if (error) + { + print_error(error, + "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT."); + return -1; + } + break; + default: log_error("unexpected object type"); return -1; } diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp index 43e81277..827072fc 100644 --- a/test_conformance/allocations/main.cpp +++ b/test_conformance/allocations/main.cpp @@ -326,6 +326,7 @@ int main(int argc, const char *argv[]) else if ( strcmp( argv[i], "--help" ) == 0 || strcmp( argv[i], "-h" ) == 0 ) { printUsage( argv[0] ); + free(argList); return -1; } diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp index 8dc8f6cf..7eedec85 100644 --- a/test_conformance/api/test_mem_object_info.cpp +++ b/test_conformance/api/test_mem_object_info.cpp @@ -217,6 +217,9 @@ int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_ // Create a buffer object to test against. bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error ); test_error( error, "Unable to create buffer to test with" ); + void *ptr; + TEST_MEM_OBJECT_PARAM(bufferObject, CL_MEM_HOST_PTR, ptr, NULL, + "host pointer", "%p", void *) } // Perform buffer object queries. diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp index 75bdd479..83fcb636 100644 --- a/test_conformance/api/test_null_buffer_arg.cpp +++ b/test_conformance/api/test_null_buffer_arg.cpp @@ -64,16 +64,21 @@ static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel, cl_int status; const char *typestr; - if (type == NON_NULL_PATH) { - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); - typestr = "non-NULL"; - } else if (type == ADDROF_NULL_PATH) { - test_buf = NULL; - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); - typestr = "&NULL"; - } else if (type == NULL_PATH) { - status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL); - typestr = "NULL"; + switch (type) + { + case NON_NULL_PATH: + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); + typestr = "non-NULL"; + break; + case ADDROF_NULL_PATH: + test_buf = NULL; + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf); + typestr = "&NULL"; + break; + case NULL_PATH: + status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL); + typestr = "NULL"; + break; } log_info("Testing setKernelArgs with %s buffer.\n", typestr); diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp index fa5c227f..f0740107 100644 --- a/test_conformance/api/test_queries.cpp +++ b/test_conformance/api/test_queries.cpp @@ -644,6 +644,13 @@ int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_q } log_info( "\tReported device profile: %s \n", profile ); + if (strcmp(profile, "FULL_PROFILE") == 0 && compilerAvail != CL_TRUE) + { + log_error("ERROR: Returned profile of device is FULL , but " + "CL_DEVICE_COMPILER_AVAILABLE is not CL_TRUE as required by " + "OpenCL 1.2!"); + return -1; + } return 0; } @@ -799,8 +806,8 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c test_error(error, "clFinish failed"); if (max_dimensions == 2) { - return 0; free(source); + return 0; } local[1]--; local[2]++; diff --git a/test_conformance/api/test_queue_properties.cpp b/test_conformance/api/test_queue_properties.cpp index 62d0a734..768bd5de 100644 --- a/test_conformance/api/test_queue_properties.cpp +++ b/test_conformance/api/test_queue_properties.cpp @@ -107,8 +107,9 @@ int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_ clProgramWrapper program; clKernelWrapper kernel; - cl_queue_properties_khr device_props = 0; - cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 }; + cl_command_queue_properties device_props = 0; + cl_command_queue_properties queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, + 0 }; // Query extension if (!is_extension_available(deviceID, "cl_khr_create_command_queue")) diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp index 7da2dfa7..ce0410bc 100644 --- a/test_conformance/atomics/test_indexed_cases.cpp +++ b/test_conformance/atomics/test_indexed_cases.cpp @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. // + +#include <memory> + #include "testBase.h" #include "harness/conversions.h" @@ -226,13 +229,13 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, (int)global_threads[0], (int)local_threads[0]); // Allocate our storage - cl_mem bin_counters = + clMemWrapper bin_counters = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int) * number_of_bins, NULL, NULL); - cl_mem bins = clCreateBuffer( + clMemWrapper bins = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL); - cl_mem bin_assignments = + clMemWrapper bin_assignments = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * number_of_items, NULL, NULL); @@ -253,7 +256,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, } // Initialize our storage - cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + std::unique_ptr<cl_int[]> l_bin_counts(new cl_int[number_of_bins]); if (!l_bin_counts) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -263,8 +266,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, int i; for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0; err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, - sizeof(cl_int) * number_of_bins, l_bin_counts, 0, - NULL, NULL); + sizeof(cl_int) * number_of_bins, + l_bin_counts.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to set initial values for " @@ -273,8 +276,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } - cl_int *values = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + std::unique_ptr<cl_int[]> values( + new cl_int[number_of_bins * max_counts_per_bin]); if (!values) { log_error( @@ -285,7 +288,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int) * number_of_bins * max_counts_per_bin, - values, 0, NULL, NULL); + values.get(), 0, NULL, NULL); if (err) { log_error( @@ -293,10 +296,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err); return -1; } - free(values); - cl_int *l_bin_assignments = - (cl_int *)malloc(sizeof(cl_int) * number_of_items); + std::unique_ptr<cl_int[]> l_bin_assignments(new cl_int[number_of_items]); if (!l_bin_assignments) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -326,7 +327,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, } err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int) * number_of_items, - l_bin_assignments, 0, NULL, NULL); + l_bin_assignments.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to set initial values for " @@ -355,8 +356,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } - cl_int *final_bin_assignments = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin); + std::unique_ptr<cl_int[]> final_bin_assignments( + new cl_int[number_of_bins * max_counts_per_bin]); if (!final_bin_assignments) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -366,15 +367,14 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, err = clEnqueueReadBuffer(queue, bins, true, 0, sizeof(cl_int) * number_of_bins * max_counts_per_bin, - final_bin_assignments, 0, NULL, NULL); + final_bin_assignments.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to read back bins: %d\n", err); return -1; } - cl_int *final_bin_counts = - (cl_int *)malloc(sizeof(cl_int) * number_of_bins); + std::unique_ptr<cl_int[]> final_bin_counts(new cl_int[number_of_bins]); if (!final_bin_counts) { log_error("add_index_bin_test FAILED to allocate initial values for " @@ -382,8 +382,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, return -1; } err = clEnqueueReadBuffer(queue, bin_counters, true, 0, - sizeof(cl_int) * number_of_bins, final_bin_counts, - 0, NULL, NULL); + sizeof(cl_int) * number_of_bins, + final_bin_counts.get(), 0, NULL, NULL); if (err) { log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", @@ -460,13 +460,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, errors++; } } - free(l_bin_counts); - free(l_bin_assignments); - free(final_bin_assignments); - free(final_bin_counts); - clReleaseMemObject(bin_counters); - clReleaseMemObject(bins); - clReleaseMemObject(bin_assignments); + if (errors == 0) { log_info("add_index_bin_test passed. Each item was put in the correct " diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt index dde3311d..684a7d1d 100644 --- a/test_conformance/basic/CMakeLists.txt +++ b/test_conformance/basic/CMakeLists.txt @@ -2,7 +2,7 @@ set(MODULE_NAME BASIC) set(${MODULE_NAME}_SOURCES main.cpp - test_fpmath_float.cpp + test_fpmath.cpp test_intmath.cpp test_hiloeo.cpp test_local.cpp test_pointercast.cpp test_if.cpp test_loop.cpp @@ -11,7 +11,7 @@ set(${MODULE_NAME}_SOURCES test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp test_imagedim.cpp test_vloadstore.cpp - test_int2float.cpp test_float2int.cpp + test_int2fp.cpp test_createkernelsinprogram.cpp test_hostptr.cpp test_explicit_s2v.cpp @@ -52,14 +52,12 @@ set(${MODULE_NAME}_SOURCES test_kernel_call_kernel_function.cpp test_local_kernel_scope.cpp test_progvar.cpp - test_wg_barrier.cpp test_global_linear_id.cpp test_local_linear_id.cpp test_enqueued_local_size.cpp test_simple_image_pitch.cpp test_get_linear_ids.cpp test_rw_image_access_qualifier.cpp - test_wg_barrier.cpp test_enqueued_local_size.cpp test_global_linear_id.cpp test_local_linear_id.cpp @@ -70,6 +68,6 @@ if(APPLE) list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp) endif(APPLE) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") +set_gnulike_module_compile_flags("-Wno-sign-compare") include(../CMakeCommon.txt) diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp index 86c3cec3..d1901f95 100644 --- a/test_conformance/basic/main.cpp +++ b/test_conformance/basic/main.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -22,14 +22,15 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> + +#include <CL/cl_half.h> + #include "harness/testHarness.h" #include "procs.h" test_definition test_list[] = { ADD_TEST(hostptr), - ADD_TEST(fpmath_float), - ADD_TEST(fpmath_float2), - ADD_TEST(fpmath_float4), + ADD_TEST(fpmath), ADD_TEST(intmath_int), ADD_TEST(intmath_int2), ADD_TEST(intmath_int4), @@ -58,8 +59,8 @@ test_definition test_list[] = { ADD_TEST(image_r8), ADD_TEST(barrier), ADD_TEST_VERSION(wg_barrier, Version(2, 0)), - ADD_TEST(int2float), - ADD_TEST(float2int), + ADD_TEST(int2fp), + ADD_TEST(fp2int), ADD_TEST(imagereadwrite), ADD_TEST(imagereadwrite3d), ADD_TEST(readimage3d), @@ -155,7 +156,7 @@ test_definition test_list[] = { ADD_TEST(simple_read_image_pitch), ADD_TEST(simple_write_image_pitch), -#if defined( __APPLE__ ) +#if defined(__APPLE__) ADD_TEST(queue_priority), #endif @@ -164,9 +165,35 @@ test_definition test_list[] = { }; const int test_num = ARRAY_SIZE( test_list ); +cl_half_rounding_mode halfRoundingMode = CL_HALF_RTE; + +test_status InitCL(cl_device_id device) +{ + if (is_extension_available(device, "cl_khr_fp16")) + { + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } + } + + return TEST_PASS; +} int main(int argc, const char *argv[]) { - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); } diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h index c14340de..b685ecd5 100644 --- a/test_conformance/basic/procs.h +++ b/test_conformance/basic/procs.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,6 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. // + #include "harness/kernelHelpers.h" #include "harness/testHarness.h" #include "harness/errorHelpers.h" @@ -21,9 +22,8 @@ #include "harness/rounding_mode.h" extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_fpmath(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); @@ -52,8 +52,10 @@ extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_comm extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); +extern int test_int2fp(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_fp2int(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements); extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp index 7281f904..45669a7c 100644 --- a/test_conformance/basic/test_astype.cpp +++ b/test_conformance/basic/test_astype.cpp @@ -14,62 +14,39 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/conversions.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" +#include <limits.h> #include <stdio.h> #include <string.h> -#include <limits.h> #include <sys/types.h> #include <sys/stat.h> - +#include <vector> #include "procs.h" -#include "harness/conversions.h" -#include "harness/typeWrappers.h" +// clang-format off -static const char *astype_kernel_pattern = -"%s\n" -"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( src[ tid ] );\n" -" dst[ tid ] = tmp;\n" -"}\n"; - -static const char *astype_kernel_pattern_V3srcV3dst = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the third and fifth argument, each of which -// should be a "3", when copying from the printf for astype_kernel_pattern - -static const char *astype_kernel_pattern_V3dst = -"%s\n" -"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n" -"{\n" -" int tid = get_global_id( 0 );\n" -" %s3 tmp = as_%s3( src[ tid ] );\n" -" vstore3(tmp,tid,dst);\n" -"}\n"; -// in the printf, remove the fifth argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern +static char extension[128] = { 0 }; +static char strLoad[128] = { 0 }; +static char strStore[128] = { 0 }; +static const char *regLoad = "as_%s%s(src[tid]);\n"; +static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n"; +static const char *regStore = "dst[tid] = tmp;\n"; +static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n"; - -static const char *astype_kernel_pattern_V3src = -"%s\n" -"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n" +static const char* astype_kernel_pattern[] = { +extension, +"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n" "{\n" -" int tid = get_global_id( 0 );\n" -" %s%s tmp = as_%s%s( vload3(tid,src) );\n" -" dst[ tid ] = tmp;\n" -"}\n"; -// in the printf, remove the third argument, which -// should be a "3", when copying from the printf for astype_kernel_pattern +" int tid = get_global_id( 0 );\n", +" %s%s tmp = ", strLoad, +" ", strStore, +"}\n"}; +// clang-format on int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType, unsigned int vecSize, unsigned int outVecSize, @@ -81,68 +58,60 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q clKernelWrapper kernel; clMemWrapper streams[ 2 ]; - char programSrc[ 10240 ]; size_t threads[ 1 ], localThreads[ 1 ]; size_t typeSize = get_explicit_type_size( inVecType ); size_t outTypeSize = get_explicit_type_size(outVecType); char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" }; - MTdata d; - - - - // Create program - if(outVecSize == 3 && vecSize == 3) { - // astype_kernel_pattern_V3srcV3dst - sprintf( programSrc, astype_kernel_pattern_V3srcV3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), // sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] ); - } else if(outVecSize == 3) { - // astype_kernel_pattern_V3dst - sprintf( programSrc, astype_kernel_pattern_V3dst, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType ), - get_explicit_type_name( outVecType )); - - } else if(vecSize == 3) { - // astype_kernel_pattern_V3src - sprintf( programSrc, astype_kernel_pattern_V3src, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ),// sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } else { - sprintf( programSrc, astype_kernel_pattern, - (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name( inVecType ), sizeNames[ vecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ], - get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]); - } - - const char *ptr = programSrc; + MTdataHolder d(gRandomSeed); + + std::ostringstream sstr; + if (outVecType == kDouble || inVecType == kDouble) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (outVecType == kHalf || inVecType == kHalf) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + strcpy(extension, sstr.str().c_str()); + + if (vecSize == 3) + std::snprintf(strLoad, sizeof(strLoad), v3Load, + get_explicit_type_name(outVecType), sizeNames[outVecSize], + get_explicit_type_name(inVecType)); + else + std::snprintf(strLoad, sizeof(strLoad), regLoad, + get_explicit_type_name(outVecType), + sizeNames[outVecSize]); + + if (outVecSize == 3) + std::snprintf(strStore, sizeof(strStore), v3Store, + get_explicit_type_name(outVecType)); + else + std::snprintf(strStore, sizeof(strStore), "%s", regStore); + + auto str = + concat_kernel(astype_kernel_pattern, + sizeof(astype_kernel_pattern) / sizeof(const char *)); + std::string kernelSource = + str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize], + get_explicit_type_name(outVecType), sizeNames[outVecSize], + get_explicit_type_name(outVecType), sizeNames[outVecSize]); + + const char *ptr = kernelSource.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); test_error( error, "Unable to create testing kernel" ); - // Create some input values size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize; - char *inBuffer = (char*)malloc( inBufferSize ); + std::vector<char> inBuffer(inBufferSize); size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize; - char *outBuffer = (char*)malloc( outBufferSize ); + std::vector<char> outBuffer(outBufferSize); - d = init_genrand( gRandomSeed ); - generate_random_data( inVecType, numElements * vecSize, - d, inBuffer ); - free_mtdata(d); d = NULL; + generate_random_data(inVecType, numElements * vecSize, d, + &inBuffer.front()); // Create I/O streams and set arguments - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error ); + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, + &inBuffer.front(), &error); test_error( error, "Unable to create I/O stream" ); streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error ); test_error( error, "Unable to create I/O stream" ); @@ -161,15 +130,15 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); test_error( error, "Unable to run kernel" ); - // Get the results and compare // The beauty is that astype is supposed to return the bit pattern as a different type, which means // the output should have the exact same bit pattern as the input. No interpretation necessary! - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize, + &outBuffer.front(), 0, NULL, NULL); test_error( error, "Unable to read results" ); - char *expected = inBuffer; - char *actual = outBuffer; + char *expected = &inBuffer.front(); + char *actual = &outBuffer.front(); size_t compSize = typeSize*vecSize; if(outTypeSize*outVecSize < compSize) { compSize = outTypeSize*outVecSize; @@ -178,8 +147,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q if(outVecSize == 4 && vecSize == 3) { // as_type4(vec3) should compile but produce undefined results?? - free(inBuffer); - free(outBuffer); return 0; } @@ -188,8 +155,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q // as_typen(vecm) should compile and run but produce // implementation-defined results for m != n // and n*sizeof(type) = sizeof(vecm) - free(inBuffer); - free(outBuffer); return 0; } @@ -203,17 +168,14 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q GetDataVectorString( expected, typeSize, vecSize, expectedString ), GetDataVectorString( actual, typeSize, vecSize, actualString ) ); log_error("Src is :\n%s\n----\n%d threads %d localthreads\n", - programSrc, (int)threads[0],(int) localThreads[0]); - free(inBuffer); - free(outBuffer); + kernelSource.c_str(), (int)threads[0], + (int)localThreads[0]); return 1; } expected += typeSize * vecSize; actual += outTypeSize * outVecSize; } - free(inBuffer); - free(outBuffer); return 0; } @@ -223,31 +185,39 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, // legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way // for us to verify what is "valid". So the only thing we can test are types that match in size independent // of the element count (char -> uchar, etc) - ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + const std::vector<ExplicitType> vecTypes = { kChar, kUChar, kShort, + kUShort, kInt, kUInt, + kLong, kULong, kFloat, + kHalf, kDouble }; + const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx; size_t inTypeSize, outTypeSize; int error = 0; - for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ ) + bool fp16Support = is_extension_available(device, "cl_khr_fp16"); + bool fp64Support = is_extension_available(device, "cl_khr_fp64"); + + auto skip_type = [&](ExplicitType et) { + if ((et == kLong || et == kULong) && !gHasLong) + return true; + else if (et == kDouble && !fp64Support) + return true; + else if (et == kHalf && !fp16Support) + return true; + return false; + }; + + for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++) { inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]); - if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) - continue; - - if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong ) - continue; + if (skip_type(vecTypes[inTypeIdx])) continue; - for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ ) + for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++) { outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]); - if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) { - continue; - } - if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong ) - continue; + if (skip_type(vecTypes[outTypeIdx])) continue; // change this check if( inTypeIdx == outTypeIdx ) { @@ -259,7 +229,6 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) { - for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++) { if(vecSizes[sizeIdx]*inTypeSize != @@ -268,10 +237,7 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue, continue; } error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems ); - - } - } if(get_explicit_type_size(vecTypes[inTypeIdx]) == get_explicit_type_size(vecTypes[outTypeIdx])) { diff --git a/test_conformance/basic/test_async_copy.cpp b/test_conformance/basic/test_async_copy.cpp index a537c8fe..bb529bce 100644 --- a/test_conformance/basic/test_async_copy.cpp +++ b/test_conformance/basic/test_async_copy.cpp @@ -20,8 +20,7 @@ #include <string.h> #include <sys/types.h> #include <sys/stat.h> - - +#include <vector> #include "procs.h" #include "harness/conversions.h" @@ -86,8 +85,7 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, clKernelWrapper kernel; clMemWrapper streams[ 2 ]; size_t threads[ 1 ], localThreads[ 1 ]; - void *inBuffer, *outBuffer; - MTdata d; + MTdataHolder d(gRandomSeed); char vecNameString[64]; vecNameString[0] = 0; if (vecSize == 1) sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); @@ -109,9 +107,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, char programSource[4096]; programSource[0]=0; char *programPtr; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString); + std::string extStr = ""; + if (vecType == kDouble) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"; + else if (vecType == kHalf) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"; + + sprintf(programSource, kernelCode, extStr.c_str(), vecNameString, + vecNameString, vecNameString, vecNameString, + get_explicit_type_name(vecType), vecNameString, vecNameString); //log_info("program: %s\n", programSource); programPtr = programSource; @@ -150,9 +154,10 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize; size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; - inBuffer = (void*)malloc(globalBufferSize); - outBuffer = (void*)malloc(globalBufferSize); - memset(outBuffer, 0, globalBufferSize); + std::vector<unsigned char> inBuffer(globalBufferSize); + std::vector<unsigned char> outBuffer(globalBufferSize); + + outBuffer.assign(globalBufferSize, 0); cl_int copiesPerWorkItemInt, copiesPerWorkgroup; copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem; @@ -164,13 +169,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand( gRandomSeed ); - generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer ); - free_mtdata(d); d = NULL; + generate_random_data(vecType, + globalBufferSize / get_explicit_type_size(vecType), d, + &inBuffer.front()); - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error ); + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + &inBuffer.front(), &error); test_error( error, "Unable to create input buffer" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error ); + streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + &outBuffer.front(), &error); test_error( error, "Unable to create output buffer" ); error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); @@ -189,16 +196,18 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, test_error( error, "Unable to queue kernel" ); // Read - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize, + &outBuffer.front(), 0, NULL, NULL); test_error( error, "Unable to read results" ); // Verify int failuresPrinted = 0; - if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 ) + if (memcmp(&inBuffer.front(), &outBuffer.front(), globalBufferSize) != 0) { size_t typeSize = get_explicit_type_size(vecType)* vecSize; - unsigned char * inchar = (unsigned char*)inBuffer; - unsigned char * outchar = (unsigned char*)outBuffer; + unsigned char *inchar = static_cast<unsigned char *>(&inBuffer.front()); + unsigned char *outchar = + static_cast<unsigned char *>(&outBuffer.front()); for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) { if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 ) { @@ -226,26 +235,29 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, } } - free(inBuffer); - free(outBuffer); - return failuresPrinted ? -1 : 0; } int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; + const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; unsigned int size, typeIndex; int errors = 0; - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) - { - if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) ) - continue; + bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++) + { if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) continue; + else if (vecType[typeIndex] == kDouble && !fp64Support) + continue; + else if (vecType[typeIndex] == kHalf && !fp16Support) + continue; for( size = 0; vecSizes[ size ] != 0; size++ ) { @@ -259,9 +271,6 @@ int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_qu return 0; } - - - int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel ); diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp index bf3f1552..11ef84bd 100644 --- a/test_conformance/basic/test_async_copy2D.cpp +++ b/test_conformance/basic/test_async_copy2D.cpp @@ -27,17 +27,25 @@ static const char *async_global_to_local_kernel2D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, - __local %s *localBuffer, int numElementsPerLine, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, + __local VarSizeStruct *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem, int srcStride, int dstStride) { // Zero the local storage first for (int i = 0; i < lineCopiesPerWorkItem; i++) { for (int j = 0; j < numElementsPerLine; j++) { const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } @@ -45,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, // try the copy barrier( CLK_LOCAL_MEM_FENCE ); event_t event = async_work_group_copy_2D2D(localBuffer, 0, src, - lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s), + lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0); // Wait for the copy to complete, then verify by manually copying to the dest @@ -63,16 +71,24 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, static const char *async_local_to_global_kernel2D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem, int srcStride, int dstStride) { // Zero the local storage first for (int i = 0; i < lineCopiesPerWorkItem; i++) { for (int j = 0; j < numElementsPerLine; j++) { const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } @@ -90,36 +106,22 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca // Do this to verify all kernels are done copying to the local buffer before we try the copy barrier(CLK_LOCAL_MEM_FENCE); event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride, - localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, + localBuffer, 0, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 ); wait_group_events(1, &event); }; )OpenCLC"; -int test_copy2D(cl_device_id deviceID, cl_context context, - cl_command_queue queue, const char *kernelCode, - ExplicitType vecType, int vecSize, int srcMargin, int dstMargin, - bool localIsDst) +int test_copy2D(const cl_device_id deviceID, const cl_context context, + const cl_command_queue queue, const char *const kernelCode, + const size_t elementSize, const int srcMargin, + const int dstMargin, const bool localIsDst) { int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - void *inBuffer, *outBuffer, *outBufferCopy; - MTdata d; - char vecNameString[64]; - vecNameString[0] = 0; - if (vecSize == 1) - sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); - else - sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), - vecSize); - size_t elementSize = get_explicit_type_size(vecType) * vecSize; - log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString, - srcMargin, dstMargin); + log_info("Testing %d byte element with srcMargin = %d, dstMargin = %d\n", + elementSize, srcMargin, dstMargin); cl_long max_local_mem_size; error = @@ -139,6 +141,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed."); + cl_long max_work_group_size; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_group_size), &max_work_group_size, + NULL); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed."); + if (max_alloc_size > max_global_mem_size / 2) max_alloc_size = max_global_mem_size / 2; @@ -149,20 +158,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed."); - char programSource[4096]; - programSource[0] = 0; - char *programPtr; + char programSource[4096] = { 0 }; + const char *programPtr = programSource; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" - : "", - vecNameString, vecNameString, vecNameString, vecNameString, - get_explicit_type_name(vecType), vecNameString); + sprintf(programSource, kernelCode, elementSize); // log_info("program: %s\n", programSource); - programPtr = programSource; + + clProgramWrapper program; + clKernelWrapper kernel; error = create_single_kernel_helper(context, &program, &kernel, 1, - (const char **)&programPtr, "test_fn"); + &programPtr, "test_fn"); test_error(error, "Unable to create testing kernel"); size_t max_workgroup_size; @@ -188,9 +194,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context, const cl_int dstStride = numElementsPerLine + dstMargin; const cl_int srcStride = numElementsPerLine + srcMargin; - elementSize = - get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); - const size_t lineCopiesPerWorkItem = 13; const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize * (localIsDst ? dstStride : srcStride); @@ -208,7 +211,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context, if (maxLocalWorkgroupSize > max_workgroup_size) localWorkgroupSize = max_workgroup_size; - const size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcMargin) / srcStride; const size_t maxTotalLinesOut = @@ -231,9 +233,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context, const size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; - inBuffer = (void *)malloc(inBufferSize); - outBuffer = (void *)malloc(outBufferSize); - outBufferCopy = (void *)malloc(outBufferSize); + if ((localBufferSize / 4) > max_work_group_size) + { + log_info("Skipping due to resource requirements local:%db " + "max_work_group_size:%d\n", + localBufferSize, max_work_group_size); + return 0; + } + + void *const inBuffer = (void *)malloc(inBufferSize); + void *const outBuffer = (void *)malloc(outBufferSize); + void *const outBufferCopy = (void *)malloc(outBufferSize); const cl_int lineCopiesPerWorkItemInt = static_cast<cl_int>(lineCopiesPerWorkItem); @@ -250,18 +260,20 @@ int test_copy2D(cl_device_id deviceID, cl_context context, (int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup, lineCopiesPerWorkItemInt); + size_t threads[1], localThreads[1]; + threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand(gRandomSeed); - generate_random_data( - vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer); - generate_random_data( - vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer); + MTdata d = init_genrand(gRandomSeed); + generate_random_data(kChar, inBufferSize, d, inBuffer); + generate_random_data(kChar, outBufferSize, d, outBuffer); free_mtdata(d); d = NULL; memcpy(outBufferCopy, outBuffer, outBufferSize); + clMemWrapper streams[2]; + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error); test_error(error, "Unable to create input buffer"); @@ -301,8 +313,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context, // Verify int failuresPrinted = 0; - // Verify - size_t typeSize = get_explicit_type_size(vecType) * vecSize; + for (int i = 0; i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize; i += elementSize) @@ -313,13 +324,12 @@ int test_copy2D(cl_device_id deviceID, cl_context context, int inIdx = i * srcStride + j; int outIdx = i * dstStride + j; if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx, - typeSize) + elementSize) != 0) { unsigned char *inchar = (unsigned char *)inBuffer + inIdx; unsigned char *outchar = (unsigned char *)outBuffer + outIdx; - char values[4096]; - values[0] = 0; + char values[4096] = { 0 }; if (failuresPrinted == 0) { @@ -382,16 +392,14 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, bool localIsDst) { - ExplicitType vecType[] = { - kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, - kULong, kFloat, kDouble, kNumExplicitTypes - }; + const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7, + 8, 13, 16, 32, 47, 64 }; // The margins below represent the number of elements between the end of // one line and the start of the next. The strides are equivalent to the // length of the line plus the chosen margin. - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int smallTypesMarginSizes[] = { 0, 10, 100 }; - unsigned int size, typeIndex, srcMargin, dstMargin; + // These have to be multipliers, because the margin must be a multiple of + // element size. + const unsigned int marginMultipliers[] = { 0, 10, 100 }; int errors = 0; @@ -399,55 +407,27 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context, { log_info( "Device does not support extended async copies. Skipping test.\n"); - return 0; } - - for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + else { - if (vecType[typeIndex] == kDouble - && !is_extension_available(deviceID, "cl_khr_fp64")) - continue; - - if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) - && !gHasLong) - continue; - - for (size = 0; vecSizes[size] != 0; size++) + for (const unsigned int elemSize : elemSizes) { - if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] - <= 2) // small type + for (const unsigned int srcMarginMultiplier : marginMultipliers) { - for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - srcMargin++) + for (const unsigned int dstMarginMultiplier : marginMultipliers) { - for (dstMargin = 0; - dstMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - dstMargin++) + if (test_copy2D(deviceID, context, queue, kernelCode, + elemSize, srcMarginMultiplier * elemSize, + dstMarginMultiplier * elemSize, localIsDst)) { - if (test_copy2D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], - smallTypesMarginSizes[srcMargin], - smallTypesMarginSizes[dstMargin], - localIsDst)) - { - errors++; - } + errors++; } } } - // not a small type, check only zero stride - else if (test_copy2D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], 0, 0, - localIsDst)) - { - errors++; - } } } - if (errors) return -1; - return 0; + + return errors ? -1 : 0; } int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context, diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp index 5eb41ebc..aa22f3a2 100644 --- a/test_conformance/basic/test_async_copy3D.cpp +++ b/test_conformance/basic/test_async_copy3D.cpp @@ -27,9 +27,14 @@ static const char *async_global_to_local_kernel3D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer, int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, int dstLineStride, int srcPlaneStride, int dstPlaneStride ) { @@ -38,7 +43,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca for (int j = 0; j < numLines; j++) { for (int k = 0; k < numElementsPerLine; k++) { const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } } @@ -48,7 +55,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca event_t event = async_work_group_copy_3D3D(localBuffer, 0, src, planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride, - sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, + sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0); @@ -69,9 +76,14 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca static const char *async_local_to_global_kernel3D = R"OpenCLC( #pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable -%s // optional pragma string -__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer, +#define STRUCT_SIZE %d +typedef struct __attribute__((packed)) +{ + uchar byte[STRUCT_SIZE]; +} VarSizeStruct __attribute__((aligned(1))); + +__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer, int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, int dstLineStride, int srcPlaneStride, int dstPlaneStride) { @@ -80,7 +92,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca for (int j = 0; j < numLines; j++) { for (int k = 0; k < numElementsPerLine; k++) { const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k; - localBuffer[index] = (%s)(%s)0; + for (int k = 0; k < STRUCT_SIZE; k++) { + localBuffer[index].byte[k] = 0; + } } } } @@ -103,39 +117,26 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca event_t event = async_work_group_copy_3D3D(dst, planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0, - sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup, + sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0); wait_group_events(1, &event); } )OpenCLC"; -int test_copy3D(cl_device_id deviceID, cl_context context, - cl_command_queue queue, const char *kernelCode, - ExplicitType vecType, int vecSize, int srcLineMargin, - int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin, - bool localIsDst) +int test_copy3D(const cl_device_id deviceID, const cl_context context, + const cl_command_queue queue, const char *const kernelCode, + const size_t elementSize, const int srcLineMargin, + const int dstLineMargin, const int srcPlaneMargin, + const int dstPlaneMargin, const bool localIsDst) { int error; - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper streams[2]; - size_t threads[1], localThreads[1]; - void *inBuffer, *outBuffer, *outBufferCopy; - MTdata d; - char vecNameString[64]; - vecNameString[0] = 0; - if (vecSize == 1) - sprintf(vecNameString, "%s", get_explicit_type_name(vecType)); - else - sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType), - vecSize); - size_t elementSize = get_explicit_type_size(vecType) * vecSize; - log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, " - "srcPlaneMargin = %d, dstPlaneMargin = %d\n", - vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin, - dstPlaneMargin); + log_info( + "Testing %d byte element with srcLineMargin = %d, dstLineMargin = %d, " + "srcPlaneMargin = %d, dstPlaneMargin = %d\n", + elementSize, srcLineMargin, dstLineMargin, srcPlaneMargin, + dstPlaneMargin); cl_long max_local_mem_size; error = @@ -165,20 +166,16 @@ int test_copy3D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed."); - char programSource[4096]; - programSource[0] = 0; - char *programPtr; + char programSource[4096] = { 0 }; + const char *programPtr = programSource; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" - : "", - vecNameString, vecNameString, vecNameString, vecNameString, - get_explicit_type_name(vecType), vecNameString, vecNameString); + sprintf(programSource, kernelCode, elementSize); // log_info("program: %s\n", programSource); - programPtr = programSource; + clProgramWrapper program; + clKernelWrapper kernel; error = create_single_kernel_helper(context, &program, &kernel, 1, - (const char **)&programPtr, "test_fn"); + &programPtr, "test_fn"); test_error(error, "Unable to create testing kernel"); size_t max_workgroup_size; @@ -196,6 +193,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context, test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); + cl_long max_work_group_size; + error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(max_work_group_size), &max_work_group_size, + NULL); + test_error(error, + "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed."); + // Pick the minimum of the device and the kernel if (max_workgroup_size > max_local_workgroup_size[0]) max_workgroup_size = max_local_workgroup_size[0]; @@ -208,8 +212,6 @@ int test_copy3D(cl_device_id deviceID, cl_context context, const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin; const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin; - elementSize = - get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize); const size_t planesCopiesPerWorkItem = 2; const size_t localStorageSpacePerWorkitem = elementSize * planesCopiesPerWorkItem @@ -251,9 +253,17 @@ int test_copy3D(cl_device_id deviceID, cl_context context, const size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize; - inBuffer = (void *)malloc(inBufferSize); - outBuffer = (void *)malloc(outBufferSize); - outBufferCopy = (void *)malloc(outBufferSize); + if ((localBufferSize / 4) > max_work_group_size) + { + log_info("Skipping due to resource requirements local:%db " + "max_work_group_size:%d\n", + localBufferSize, max_work_group_size); + return 0; + } + + void *const inBuffer = (void *)malloc(inBufferSize); + void *const outBuffer = (void *)malloc(outBufferSize); + void *const outBufferCopy = (void *)malloc(outBufferSize); const cl_int planesCopiesPerWorkItemInt = static_cast<cl_int>(planesCopiesPerWorkItem); @@ -270,18 +280,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context, (int)localBufferSize, (int)inBufferSize, (int)outBufferSize, planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt); + size_t threads[1], localThreads[1]; + threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand(gRandomSeed); - generate_random_data( - vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer); - generate_random_data( - vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer); + MTdata d = init_genrand(gRandomSeed); + generate_random_data(kChar, inBufferSize, d, inBuffer); + generate_random_data(kChar, outBufferSize, d, outBuffer); free_mtdata(d); d = NULL; memcpy(outBufferCopy, outBuffer, outBufferSize); + clMemWrapper streams[2]; + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error); test_error(error, "Unable to create input buffer"); @@ -327,8 +339,7 @@ int test_copy3D(cl_device_id deviceID, cl_context context, // Verify int failuresPrinted = 0; - // Verify - size_t typeSize = get_explicit_type_size(vecType) * vecSize; + for (int i = 0; i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize; i += elementSize) @@ -341,14 +352,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context, int inIdx = i * srcPlaneStride + j * srcLineStride + k; int outIdx = i * dstPlaneStride + j * dstLineStride + k; if (memcmp(((char *)inBuffer) + inIdx, - ((char *)outBuffer) + outIdx, typeSize) + ((char *)outBuffer) + outIdx, elementSize) != 0) { unsigned char *inchar = (unsigned char *)inBuffer + inIdx; unsigned char *outchar = (unsigned char *)outBuffer + outIdx; - char values[4096]; - values[0] = 0; + char values[4096] = { 0 }; if (failuresPrinted == 0) { @@ -439,17 +449,14 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, bool localIsDst) { - ExplicitType vecType[] = { - kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, - kULong, kFloat, kDouble, kNumExplicitTypes - }; + const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7, + 8, 13, 16, 32, 47, 64 }; // The margins below represent the number of elements between the end of - // one line or plane and the start of the next. The strides are equivalent - // to the size of the line or plane plus the chosen margin. - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int smallTypesMarginSizes[] = { 0, 10, 100 }; - unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin, - dstPlaneMargin; + // one line and the start of the next. The strides are equivalent to the + // size of the line or plane plus the chosen margin. + // These have to be multipliers, because the margin must be a multiple of + // element size. + const unsigned int marginMultipliers[] = { 0, 10, 100 }; int errors = 0; @@ -457,67 +464,36 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context, { log_info( "Device does not support extended async copies. Skipping test.\n"); - return 0; } - - for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++) + else { - if (vecType[typeIndex] == kDouble - && !is_extension_available(deviceID, "cl_khr_fp64")) - continue; - - if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) - && !gHasLong) - continue; - - for (size = 0; vecSizes[size] != 0; size++) + for (const unsigned int elemSize : elemSizes) { - if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size] - <= 2) // small type + for (const unsigned int srcLineMarginMultiplier : marginMultipliers) { - for (srcLineMargin = 0; - srcLineMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - srcLineMargin++) + for (const unsigned int dstLineMarginMultiplier : + marginMultipliers) { - for (dstLineMargin = 0; - dstLineMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - dstLineMargin++) + for (const unsigned int srcPlaneMarginMultiplier : + marginMultipliers) { - for (srcPlaneMargin = 0; - srcPlaneMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - srcPlaneMargin++) + for (const unsigned int dstPlaneMarginMultiplier : + marginMultipliers) { - for (dstPlaneMargin = 0; - dstPlaneMargin < sizeof(smallTypesMarginSizes) - / sizeof(smallTypesMarginSizes[0]); - dstPlaneMargin++) + if (test_copy3D(deviceID, context, queue, + kernelCode, elemSize, + srcLineMarginMultiplier * elemSize, + dstLineMarginMultiplier * elemSize, + srcPlaneMarginMultiplier * elemSize, + dstPlaneMarginMultiplier * elemSize, + localIsDst)) { - if (test_copy3D( - deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], - smallTypesMarginSizes[srcLineMargin], - smallTypesMarginSizes[dstLineMargin], - smallTypesMarginSizes[srcPlaneMargin], - smallTypesMarginSizes[dstPlaneMargin], - localIsDst)) - { - errors++; - } + errors++; } } } } } - // not a small type, check only zero stride - else if (test_copy3D(deviceID, context, queue, kernelCode, - vecType[typeIndex], vecSizes[size], 0, 0, 0, 0, - localIsDst)) - { - errors++; - } } } if (errors) return -1; diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp index c456f38d..932e9b8c 100644 --- a/test_conformance/basic/test_async_strided_copy.cpp +++ b/test_conformance/basic/test_async_strided_copy.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -20,15 +20,16 @@ #include <string.h> #include <sys/types.h> #include <sys/stat.h> - - +#include <vector> #include "procs.h" #include "harness/conversions.h" +// clang-format off + static const char *async_strided_global_to_local_kernel = "%s\n" // optional pragma string -"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" +"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" "{\n" " int i;\n" // Zero the local storage first @@ -46,7 +47,7 @@ static const char *async_strided_global_to_local_kernel = static const char *async_strided_local_to_global_kernel = "%s\n" // optional pragma string -"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" +"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n" "{\n" " int i;\n" // Zero the local storage first @@ -63,6 +64,7 @@ static const char *async_strided_local_to_global_kernel = " wait_group_events( 1, &event );\n" "}\n" ; +// clang-format on int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride) { @@ -71,8 +73,7 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu clKernelWrapper kernel; clMemWrapper streams[ 2 ]; size_t threads[ 1 ], localThreads[ 1 ]; - void *inBuffer, *outBuffer; - MTdata d; + MTdataHolder d(gRandomSeed); char vecNameString[64]; vecNameString[0] = 0; if (vecSize == 1) @@ -94,10 +95,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu char programSource[4096]; programSource[0]=0; char *programPtr; - sprintf(programSource, kernelCode, - vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - "", - vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString); + std::string extStr = ""; + if (vecType == kDouble) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"; + else if (vecType == kHalf) + extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"; + + sprintf(programSource, kernelCode, extStr.c_str(), vecNameString, + vecNameString, vecNameString, vecNameString, + get_explicit_type_name(vecType), vecNameString, vecNameString); //log_info("program: %s\n", programSource); programPtr = programSource; @@ -151,9 +157,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride; size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize; - inBuffer = (void*)malloc(globalBufferSize); - outBuffer = (void*)malloc(globalBufferSize); - memset(outBuffer, 0, globalBufferSize); + std::vector<unsigned char> inBuffer(globalBufferSize); + std::vector<unsigned char> outBuffer(globalBufferSize); + memset(outBuffer.data(), 0, globalBufferSize); cl_int copiesPerWorkItemInt, copiesPerWorkgroup; copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem; @@ -165,13 +171,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu threads[0] = globalWorkgroupSize; localThreads[0] = localWorkgroupSize; - d = init_genrand( gRandomSeed ); - generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer ); - free_mtdata(d); d = NULL; + generate_random_data(vecType, + globalBufferSize / get_explicit_type_size(vecType), d, + inBuffer.data()); - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error ); + streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + inBuffer.data(), &error); test_error( error, "Unable to create input buffer" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error ); + streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize, + outBuffer.data(), &error); test_error( error, "Unable to create output buffer" ); error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] ); @@ -192,17 +200,20 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu test_error( error, "Unable to queue kernel" ); // Read - error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize, + outBuffer.data(), 0, NULL, NULL); test_error( error, "Unable to read results" ); // Verify size_t typeSize = get_explicit_type_size(vecType)* vecSize; for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride) { - if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 ) + if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0) { - unsigned char * inchar = (unsigned char*)inBuffer + i; - unsigned char * outchar = (unsigned char*)outBuffer + i; + unsigned char *inchar = + static_cast<unsigned char *>(&inBuffer.at(i)); + unsigned char *outchar = + static_cast<unsigned char *>(&outBuffer.at(i)); char values[4096]; values[0] = 0; @@ -215,34 +226,35 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu sprintf(values + strlen( values), "%2x ", outchar[j]); sprintf(values + strlen(values), "]"); log_error("%s\n", values); - free(inBuffer); - free(outBuffer); return -1; } } - free(inBuffer); - free(outBuffer); - return 0; } int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; - unsigned int strideSizes[] = { 1, 3, 4, 5, 0 }; + const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; + const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; + const unsigned int strideSizes[] = { 1, 3, 4, 5, 0 }; unsigned int size, typeIndex, stride; int errors = 0; - for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ ) - { - if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) ) - continue; + bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++) + { if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong ) continue; + else if (vecType[typeIndex] == kDouble && !fp64Support) + continue; + else if (vecType[typeIndex] == kHalf && !fp16Support) + continue; for( size = 0; vecSizes[ size ] != 0; size++ ) { @@ -260,9 +272,6 @@ int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_co return 0; } - - - int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel ); diff --git a/test_conformance/basic/test_barrier.cpp b/test_conformance/basic/test_barrier.cpp index d20af14a..6352b42f 100644 --- a/test_conformance/basic/test_barrier.cpp +++ b/test_conformance/basic/test_barrier.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,143 +21,136 @@ #include <sys/types.h> #include <sys/stat.h> +#include <algorithm> +#include <numeric> +#include <vector> #include "procs.h" -const char *barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i<n; i+=lsize)\n" -" tmp_sum[tid] += a[i];\n" -" \n" -" // updated to work for any workgroup size \n" -" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n" -" {\n" -" barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *outptr, int n) +namespace { +const char *barrier_kernel_code = R"( +__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, + __global int *sum) { - int r = 0; - int i; + int tid = get_local_id(0); + int lsize = get_local_size(0); + int i; - for (i=0; i<n; i++) - { - r += inptr[i]; - } + tmp_sum[tid] = 0; + for (i = tid; i < n; i += lsize) tmp_sum[tid] += a[i]; - if (r != outptr[0]) + // updated to work for any workgroup size + for (i = hadd(lsize, 1); lsize > 1; i = hadd(i, 1)) { - log_error("BARRIER test failed\n"); - return -1; + BARRIER(CLK_GLOBAL_MEM_FENCE); + if (tid + i < lsize) tmp_sum[tid] += tmp_sum[tid + i]; + lsize = i; } - log_info("BARRIER test passed\n"); - return 0; + // no barrier is required here because last person to write to tmp_sum[0] + // was tid 0 + if (tid == 0) *sum = tmp_sum[0]; } +)"; -int -test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +void generate_random_inputs(std::vector<cl_int> &v) { - cl_mem streams[3]; - cl_int *input_ptr = NULL, *output_ptr = NULL; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - size_t local_threads[3]; - int err; - int i; - size_t max_local_workgroup_size[3]; - size_t max_threadgroup_size = 0; - MTdata d; + RandomSeed seed(gRandomSeed); - err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" ); - test_error(err, "Failed to build kernel/program."); + auto random_generator = [&seed]() { + return static_cast<cl_int>( + get_random_float(-0x01000000, 0x01000000, seed)); + }; + + std::generate(v.begin(), v.end(), random_generator); +} + +int test_barrier_common(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements, + std::string barrier_str) +{ + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(max_threadgroup_size), &max_threadgroup_size, NULL); - test_error(err, "clGetKernelWorkgroupInfo failed."); + cl_int output; + int err; - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); - test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); + size_t max_threadgroup_size = 0; + std::string build_options = std::string("-DBARRIER=") + barrier_str; + err = create_single_kernel_helper(context, &program, &kernel, 1, + &barrier_kernel_code, "compute_sum", + build_options.c_str()); + test_error(err, "Failed to build kernel/program."); - // Pick the minimum of the device and the kernel - if (max_threadgroup_size > max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; + err = get_max_allowed_1d_work_group_size_on_device(device, kernel, + &max_threadgroup_size); + test_error(err, "get_max_allowed_1d_work_group_size_on_device failed."); // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; + while (num_elements % max_threadgroup_size) max_threadgroup_size--; - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); + std::vector<cl_int> input(num_elements); streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); + sizeof(cl_int) * num_elements, nullptr, &err); test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), + nullptr, &err); test_error(err, "clCreateBuffer failed."); streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); + sizeof(cl_int) * max_threadgroup_size, nullptr, &err); test_error(err, "clCreateBuffer failed."); - d = init_genrand( gRandomSeed ); - for (i=0; i<num_elements; i++) - input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d); - free_mtdata(d); d = NULL; + generate_random_inputs(input); - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL); + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(cl_int) * num_elements, input.data(), 0, + nullptr, nullptr); test_error(err, "clEnqueueWriteBuffer failed."); - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements); - err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]); - err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]); + err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof(num_elements), &num_elements); + err |= clSetKernelArg(kernel, 2, sizeof(streams[2]), &streams[2]); + err |= clSetKernelArg(kernel, 3, sizeof(streams[1]), &streams[1]); test_error(err, "clSetKernelArg failed."); - global_threads[0] = max_threadgroup_size; - local_threads[0] = max_threadgroup_size; + size_t global_threads[] = { max_threadgroup_size }; + size_t local_threads[] = { max_threadgroup_size }; - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL ); + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads, + local_threads, 0, nullptr, nullptr); test_error(err, "clEnqueueNDRangeKernel failed."); - err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL ); + err = clEnqueueReadBuffer(queue, streams[1], true, 0, sizeof(cl_int), + &output, 0, nullptr, nullptr); test_error(err, "clEnqueueReadBuffer failed."); - err = verify_sum(input_ptr, output_ptr, num_elements); - - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(output_ptr); + if (std::accumulate(input.begin(), input.end(), 0) != output) + { + log_error("%s test failed\n", barrier_str.c_str()); + err = -1; + } + else + { + log_info("%s test passed\n", barrier_str.c_str()); + } return err; } +} +int test_barrier(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_barrier_common(device, context, queue, num_elements, "barrier"); +} - - - +int test_wg_barrier(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return test_barrier_common(device, context, queue, num_elements, + "work_group_barrier"); +} diff --git a/test_conformance/basic/test_constant.cpp b/test_conformance/basic/test_constant.cpp index ed25c6ef..fc2667ee 100644 --- a/test_conformance/basic/test_constant.cpp +++ b/test_conformance/basic/test_constant.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,41 +21,44 @@ #include <sys/types.h> #include <sys/stat.h> +#include <algorithm> +#include <vector> #include "procs.h" -const char *constant_kernel_code = -"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" float ftmp = tmpF[tid]; \n" -" float Itmp = tmpI[tid]; \n" -" out[tid] = ftmp * Itmp; \n" -"}\n"; - -const char *loop_constant_kernel_code = -"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n" -"{\n" -" int tid = get_global_id(0);\n" -" float sum = 0;\n" -" for (int i = 0; i < num; i++) {\n" -" float pos = i_pos[i*3];\n" -" sum += pos;\n" -" }\n" -" out[tid] = sum;\n" -"}\n"; - - -static int -verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n) +namespace { +const char* constant_kernel_code = R"( +__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI) +{ + int tid = get_global_id(0); + + float ftmp = tmpF[tid]; + float Itmp = tmpI[tid]; + out[tid] = ftmp * Itmp; +} +)"; + +const char* loop_constant_kernel_code = R"( +kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num) { - int i; + int tid = get_global_id(0); + float sum = 0; + for (int i = 0; i < num; i++) { + float pos = i_pos[i*3]; + sum += pos; + } + out[tid] = sum; +} +)"; + - for (i=0; i < n; i++) +int verify(std::vector<cl_float>& tmpF, std::vector<cl_int>& tmpI, + std::vector<cl_float>& out) +{ + for (int i = 0; i < out.size(); i++) { float f = tmpF[i] * tmpI[i]; - if( out[i] != f ) + if (out[i] != f) { log_error("CONSTANT test failed\n"); return -1; @@ -66,214 +69,172 @@ verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n) return 0; } - -static int -verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n) +int verify_loop_constant(const std::vector<cl_float>& tmp, + std::vector<cl_float>& out, cl_int l) { - int i; - cl_int j; - for (i=0; i < n; i++) - { - float sum = 0; - for (j=0; j < l; ++j) - sum += tmp[j*3]; + float sum = 0; + for (int j = 0; j < l; ++j) sum += tmp[j * 3]; - if( out[i] != sum ) - { - log_error("loop CONSTANT test failed\n"); - return -1; - } + auto predicate = [&sum](cl_float elem) { return sum != elem; }; + + if (std::any_of(out.cbegin(), out.cend(), predicate)) + { + log_error("loop CONSTANT test failed\n"); + return -1; } log_info("loop CONSTANT test passed\n"); return 0; } -int -test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +template <typename T> void generate_random_inputs(std::vector<T>& v) +{ + RandomSeed seed(gRandomSeed); + + auto random_generator = [&seed]() { + return static_cast<T>(get_random_float(-0x02000000, 0x02000000, seed)); + }; + + std::generate(v.begin(), v.end(), random_generator); +} +} + +int test_constant(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { - cl_mem streams[3]; - cl_int *tmpI; - cl_float *tmpF, *out; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - int err; - unsigned int i; + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t global_threads[3]; + int err; cl_ulong maxSize, maxGlobalSize, maxAllocSize; size_t num_floats, num_ints, constant_values; - MTdata d; - RoundingMode oldRoundMode; + RoundingMode oldRoundMode; int isRTZ = 0; - /* Verify our test buffer won't be bigger than allowed */ - err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 ); - test_error( err, "Unable to get max constant buffer size" ); - - log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize); - - // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE - err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0); - test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - - if (maxSize > maxGlobalSize / 4) - maxSize = maxGlobalSize / 4; - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0); - test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE "); - - if (maxSize > maxAllocSize) - maxSize = maxAllocSize; - - maxSize/=4; - num_ints = (size_t)maxSize/sizeof(cl_int); - num_floats = (size_t)maxSize/sizeof(cl_float); - if (num_ints >= num_floats) { - constant_values = num_floats; - } else { - constant_values = num_ints; - } - - log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n", - constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float)); - - tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values); - tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values); - out = (cl_float*)malloc(sizeof(cl_float) * constant_values); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * constant_values, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * constant_values, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + /* Verify our test buffer won't be bigger than allowed */ + err = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, + sizeof(maxSize), &maxSize, 0); + test_error(err, "Unable to get max constant buffer size"); + log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", + maxSize); - d = init_genrand( gRandomSeed ); - for (i=0; i<constant_values; i++) { - tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d); - tmpF[i] = get_random_float(-0x02000000, 0x02000000, d); - } - free_mtdata(d); d = NULL; + // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE + err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, + sizeof(maxGlobalSize), &maxGlobalSize, 0); + test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE"); - err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clWriteArray failed\n"); - return -1; - } - err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clWriteArray failed\n"); - return -1; - } + maxSize = std::min(maxSize, maxGlobalSize / 4); + + err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(maxAllocSize), &maxAllocSize, 0); + test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE"); + + maxSize = std::min(maxSize, maxAllocSize); + + maxSize /= 4; + num_ints = static_cast<size_t>(maxSize / sizeof(cl_int)); + num_floats = static_cast<size_t>(maxSize / sizeof(cl_float)); + constant_values = std::min(num_floats, num_ints); + + + log_info( + "Test will attempt to use %lu bytes with one %lu byte constant int " + "buffer and one %lu byte constant float buffer.\n", + constant_values * sizeof(cl_int) + constant_values * sizeof(cl_float), + constant_values * sizeof(cl_int), constant_values * sizeof(cl_float)); + + std::vector<cl_int> tmpI(constant_values); + std::vector<cl_float> tmpF(constant_values); + std::vector<cl_float> out(constant_values); + + + streams[0] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); - err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" ); - if (err) { - log_error("Failed to create kernel and program: %d\n", err); - return -1; - } + streams[1] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_float) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); + + streams[2] = + clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(cl_int) * constant_values, nullptr, &err); + test_error(err, "clCreateBuffer failed"); + + generate_random_inputs(tmpI); + generate_random_inputs(tmpF); + + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(cl_float) * constant_values, tmpF.data(), + 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed"); + err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, + sizeof(cl_int) * constant_values, tmpI.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer faile."); + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &constant_kernel_code, "constant_kernel"); + test_error(err, "Failed to create kernel and program"); err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } + test_error(err, "clSetKernelArgs failed"); global_threads[0] = constant_values; - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed: %d\n", err); - return -1; - } - err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); + + err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(cl_float) * constant_values, out.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); - //If we only support rtz mode - if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded) + // If we only support rtz mode + if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded) { oldRoundMode = set_round(kRoundTowardZero, kfloat); isRTZ = 1; } - err = verify(tmpF, tmpI, out, (int)constant_values); + err = verify(tmpF, tmpI, out); - if (isRTZ) - (void)set_round(oldRoundMode, kfloat); + if (isRTZ) (void)set_round(oldRoundMode, kfloat); // Loop constant buffer test - cl_program loop_program; - cl_kernel loop_kernel; + clProgramWrapper loop_program; + clKernelWrapper loop_kernel; cl_int limit = 2; - memset(out, 0, sizeof(cl_float) * constant_values); + memset(out.data(), 0, sizeof(cl_float) * constant_values); err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1, - &loop_constant_kernel_code, "loop_constant_kernel" ); - if (err) { - log_error("Failed to create loop kernel and program: %d\n", err); - return -1; - } + &loop_constant_kernel_code, + "loop_constant_kernel"); + test_error(err, "Failed to create kernel and program"); err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]); err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit); - if (err != CL_SUCCESS) { - log_error("clSetKernelArgs for loop kernel failed\n"); - return -1; - } + test_error(err, "clSetKernelArgs failed"); - err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed: %d\n", err); - return -1; - } - err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL ); - if (err != CL_SUCCESS) { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueNDRangeKernel(queue, loop_kernel, 1, nullptr, global_threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed"); - err = verify_loop_constant(tmpF, out, limit, (int)constant_values); + err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(cl_float) * constant_values, out.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed"); + + err = verify_loop_constant(tmpF, out, limit); - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseKernel(kernel); - clReleaseProgram(program); - clReleaseKernel(loop_kernel); - clReleaseProgram(loop_program); - free(tmpI); - free(tmpF); - free(out); return err; } - - - - - diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp index d28f7e41..c2ea24ef 100644 --- a/test_conformance/basic/test_enqueue_map.cpp +++ b/test_conformance/basic/test_enqueue_map.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -26,6 +26,7 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" +// clang-format off const cl_mem_flags flag_set[] = { CL_MEM_ALLOC_HOST_PTR, CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR, @@ -33,93 +34,105 @@ const cl_mem_flags flag_set[] = { CL_MEM_COPY_HOST_PTR, 0 }; -const char* flag_set_names[] = { + +const char *flag_set_names[] = { "CL_MEM_ALLOC_HOST_PTR", "CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR", "CL_MEM_USE_HOST_PTR", "CL_MEM_COPY_HOST_PTR", "0" }; +// clang-format on -int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; - const size_t bufferSize = 256*256; - MTdataHolder d{gRandomSeed}; + const size_t bufferSize = 256 * 256; + MTdataHolder d{ gRandomSeed }; BufferOwningPtr<cl_char> hostPtrData{ malloc(bufferSize) }; BufferOwningPtr<cl_char> referenceData{ malloc(bufferSize) }; - BufferOwningPtr<cl_char> finalData{malloc(bufferSize)}; + BufferOwningPtr<cl_char> finalData{ malloc(bufferSize) }; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) + for (size_t src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); + src_flag_id++) { clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); generate_random_data(kChar, (unsigned int)bufferSize, d, hostPtrData); memcpy(referenceData, hostPtrData, bufferSize); void *hostPtr = nullptr; cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); if (hasHostPtr) hostPtr = hostPtrData; - memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); - test_error( error, "Unable to create testing buffer" ); + memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); if (!hasHostPtr) { error = - clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, - hostPtrData, 0, NULL, NULL); - test_error( error, "clEnqueueWriteBuffer failed"); + clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + hostPtrData, 0, NULL, NULL); + test_error(error, "clEnqueueWriteBuffer failed"); } - for( int i = 0; i < 128; i++ ) + for (int i = 0; i < 128; i++) { - size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d ); - size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d ); - - cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, length, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapBuffer call failed" ); - log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length ); - return -1; - } - - // Write into the region - for( size_t j = 0; j < length; j++ ) - { - cl_char spin = (cl_char)genrand_int32( d ); - - // Test read AND write in one swipe - cl_char value = mappedRegion[ j ]; - value = spin - value; - mappedRegion[ j ] = value; - - // Also update the initial data array - value = referenceData[offset + j]; - value = spin - value; - referenceData[offset + j] = value; - } - - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); + size_t offset = (size_t)random_in_range(0, (int)bufferSize - 1, d); + size_t length = + (size_t)random_in_range(1, (int)(bufferSize - offset), d); + + cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + length, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapBuffer call failed"); + log_error("\tOffset: %d Length: %d\n", (int)offset, + (int)length); + return -1; + } + + // Write into the region + for (size_t j = 0; j < length; j++) + { + cl_char spin = (cl_char)genrand_int32(d); + + // Test read AND write in one swipe + cl_char value = mappedRegion[j]; + value = spin - value; + mappedRegion[j] = value; + + // Also update the initial data array + value = referenceData[offset + j]; + value = spin - value; + referenceData[offset + j] = value; + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); } - // Final validation: read actual values of buffer and compare against our reference - error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, bufferSize, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + // Final validation: read actual values of buffer and compare against + // our reference + error = clEnqueueReadBuffer(queue, memObject, CL_TRUE, 0, bufferSize, + finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); - for( size_t q = 0; q < bufferSize; q++ ) + for (size_t q = 0; q < bufferSize; q++) { if (referenceData[q] != finalData[q]) { log_error( - "ERROR: Sample %d did not validate! Got %d, expected %d\n", - (int)q, (int)finalData[q], (int)referenceData[q]); + "ERROR: Sample %d did not validate! Got %d, expected %d\n", + (int)q, (int)finalData[q], (int)referenceData[q]); return -1; } } @@ -128,112 +141,129 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_comman return 0; } -int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_enqueue_map_image(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 }; const size_t imageSize = 256; const size_t imageDataSize = imageSize * imageSize * 4 * sizeof(cl_uint); - PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID ) + PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID) BufferOwningPtr<cl_uint> hostPtrData{ malloc(imageDataSize) }; BufferOwningPtr<cl_uint> referenceData{ malloc(imageDataSize) }; - BufferOwningPtr<cl_uint> finalData{malloc(imageDataSize)}; - - MTdataHolder d{gRandomSeed}; - for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) { - clMemWrapper memObject; - log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]); - - generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d, - hostPtrData); - memcpy(referenceData, hostPtrData, imageDataSize); - - cl_mem_flags flags = flag_set[src_flag_id]; - bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); - void *hostPtr = nullptr; - if (hasHostPtr) hostPtr = hostPtrData; - memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, - imageSize, imageSize, 0, hostPtr, &error ); - test_error( error, "Unable to create testing buffer" ); - - if (!hasHostPtr) { - size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1}; - error = - clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region, - 0, 0, hostPtrData, 0, NULL, NULL); - test_error( error, "Unable to write to testing buffer" ); - } - - for( int i = 0; i < 128; i++ ) + BufferOwningPtr<cl_uint> finalData{ malloc(imageDataSize) }; + + MTdataHolder d{ gRandomSeed }; + for (size_t src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set); + src_flag_id++) { + clMemWrapper memObject; + log_info("Testing with cl_mem_flags src: %s\n", + flag_set_names[src_flag_id]); + + generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), + d, hostPtrData); + memcpy(referenceData, hostPtrData, imageDataSize); + + cl_mem_flags flags = flag_set[src_flag_id]; + bool hasHostPtr = + (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR); + void *hostPtr = nullptr; + if (hasHostPtr) hostPtr = hostPtrData; + memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format, + imageSize, imageSize, 0, hostPtr, &error); + test_error(error, "Unable to create testing buffer"); - size_t offset[3], region[3]; - size_t rowPitch; - - offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d ); - offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d ); - region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d ); - offset[ 2 ] = 0; - region[ 2 ] = 1; - cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, - offset, region, &rowPitch, NULL, 0, NULL, NULL, &error ); - if( error != CL_SUCCESS ) - { - print_error( error, "clEnqueueMapImage call failed" ); - log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] ); - return -1; - } - - // Write into the region - cl_uint *mappedPtr = mappedRegion; - for( size_t y = 0; y < region[ 1 ]; y++ ) - { - for( size_t x = 0; x < region[ 0 ] * 4; x++ ) + if (!hasHostPtr) { - cl_int spin = (cl_int)random_in_range( 16, 1024, d ); - - cl_int value; - // Test read AND write in one swipe - value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ]; - value = spin - value; - mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value; - - // Also update the initial data array - value = - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x]; - value = spin - value; - referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x] = - value; + size_t write_origin[3] = { 0, 0, 0 }, + write_region[3] = { imageSize, imageSize, 1 }; + error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, + write_region, 0, 0, hostPtrData, 0, + NULL, NULL); + test_error(error, "Unable to write to testing buffer"); } - } - // Unmap - error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL ); - test_error( error, "Unable to unmap buffer" ); - } + for (int i = 0; i < 128; i++) + { - // Final validation: read actual values of buffer and compare against our reference - size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 }; - error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL ); - test_error( error, "Unable to read results" ); + size_t offset[3], region[3]; + size_t rowPitch; + + offset[0] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[0] = + (size_t)random_in_range(1, (int)(imageSize - offset[0] - 1), d); + offset[1] = (size_t)random_in_range(0, (int)imageSize - 1, d); + region[1] = + (size_t)random_in_range(1, (int)(imageSize - offset[1] - 1), d); + offset[2] = 0; + region[2] = 1; + cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( + queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset, + region, &rowPitch, NULL, 0, NULL, NULL, &error); + if (error != CL_SUCCESS) + { + print_error(error, "clEnqueueMapImage call failed"); + log_error("\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], + (int)offset[1], (int)region[0], (int)region[1]); + return -1; + } - for( size_t q = 0; q < imageSize * imageSize * 4; q++ ) - { - if (referenceData[q] != finalData[q]) + // Write into the region + cl_uint *mappedPtr = mappedRegion; + for (size_t y = 0; y < region[1]; y++) + { + for (size_t x = 0; x < region[0] * 4; x++) + { + cl_int spin = (cl_int)random_in_range(16, 1024, d); + + cl_int value; + // Test read AND write in one swipe + value = mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x]; + value = spin - value; + mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x] = value; + + // Also update the initial data array + value = + referenceData[((offset[1] + y) * imageSize + offset[0]) + * 4 + + x]; + value = spin - value; + referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + + x] = value; + } + } + + // Unmap + error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0, + NULL, NULL); + test_error(error, "Unable to unmap buffer"); + } + + // Final validation: read actual values of buffer and compare against + // our reference + size_t finalOrigin[3] = { 0, 0, 0 }, + finalRegion[3] = { imageSize, imageSize, 1 }; + error = clEnqueueReadImage(queue, memObject, CL_TRUE, finalOrigin, + finalRegion, 0, 0, finalData, 0, NULL, NULL); + test_error(error, "Unable to read results"); + + for (size_t q = 0; q < imageSize * imageSize * 4; q++) { - log_error("ERROR: Sample %d (coord %d,%d) did not validate! Got " - "%d, expected %d\n", - (int)q, (int)((q / 4) % imageSize), - (int)((q / 4) / imageSize), (int)finalData[q], - (int)referenceData[q]); - return -1; + if (referenceData[q] != finalData[q]) + { + log_error( + "ERROR: Sample %d (coord %d,%d) did not validate! Got " + "%d, expected %d\n", + (int)q, (int)((q / 4) % imageSize), + (int)((q / 4) / imageSize), (int)finalData[q], + (int)referenceData[q]); + return -1; + } } - } - } // cl_mem_flags + } // cl_mem_flags return 0; } - diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp new file mode 100644 index 00000000..9bdb192e --- /dev/null +++ b/test_conformance/basic/test_fpmath.cpp @@ -0,0 +1,386 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "harness/compat.h" +#include "harness/rounding_mode.h" +#include "harness/stringHelpers.h" + +#include <CL/cl_half.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <algorithm> +#include <functional> +#include <map> +#include <string> +#include <vector> + +#include "procs.h" + +static const char *fp_kernel_code = R"( +%s +__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) +{ + int tid = get_global_id(0); + + dst[tid] = srcA[tid] OP srcB[tid]; +})"; + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +template <typename T> double toDouble(T val) +{ + if (std::is_same<cl_half, T>::value) + return HTF(val); + else + return val; +} + +bool isHalfNan(cl_half v) +{ + // Extract FP16 exponent and mantissa + uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = v & 0x3FF; + + // NaN test + return (h_exp == 0x1F && h_mant != 0); +} + +cl_half half_plus(cl_half a, cl_half b) +{ + return HFF(std::plus<float>()(HTF(a), HTF(b))); +} + +cl_half half_minus(cl_half a, cl_half b) +{ + return HFF(std::minus<float>()(HTF(a), HTF(b))); +} + +cl_half half_mult(cl_half a, cl_half b) +{ + return HFF(std::multiplies<float>()(HTF(a), HTF(b))); +} + +template <typename T> struct TestDef +{ + const char op; + std::function<T(T, T)> ref; + std::string type_str; + size_t vec_size; +}; + +template <typename T> +int verify_fp(std::vector<T> (&input)[2], std::vector<T> &output, + const TestDef<T> &test) +{ + auto &inA = input[0]; + auto &inB = input[1]; + for (size_t i = 0; i < output.size(); i++) + { + bool nan_test = false; + + T r = test.ref(inA[i], inB[i]); + + if (std::is_same<T, cl_half>::value) + nan_test = !(isHalfNan(r) && isHalfNan(output[i])); + + if (r != output[i] && nan_test) + { + log_error("FP math test for type: %s, vec size: %zu, failed at " + "index %zu, %a '%c' %a, expected %a, get %a\n", + test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]), + test.op, toDouble(inB[i]), toDouble(r), + toDouble(output[i])); + return -1; + } + } + + return 0; +} + +template <typename T> void generate_random_inputs(std::vector<T> (&input)[2]) +{ + RandomSeed seed(gRandomSeed); + + if (std::is_same<T, float>::value) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else if (std::is_same<T, double>::value) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63), + seed); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float(-MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8), + seed)); + }; + for (auto &v : input) + std::generate(v.begin(), v.end(), random_generator); + } +} + +struct TypesIterator +{ + using TypeIter = std::tuple<cl_float, cl_half, cl_double>; + + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems) + : context(context), queue(queue), fpConfigHalf(0), fpConfigFloat(0), + num_elements(num_elems) + { + // typeid().name one day + type2name[sizeof(cl_half)] = "half"; + type2name[sizeof(cl_float)] = "float"; + type2name[sizeof(cl_double)] = "double"; + + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + fpConfigFloat = get_default_rounding_mode(deviceID); + + if (fp16Support) + fpConfigHalf = + get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG); + + for_each_elem(it); + } + + template <typename T> int test_fpmath(TestDef<T> &test) + { + constexpr size_t vecSizes[] = { 1, 2, 4, 8, 16 }; + cl_int err = CL_SUCCESS; + + std::ostringstream sstr; + if (std::is_same<T, double>::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (std::is_same<T, cl_half>::value) + sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string program_source = + str_sprintf(std::string(fp_kernel_code), sstr.str().c_str()); + + for (unsigned i = 0; i < ARRAY_SIZE(vecSizes); i++) + { + test.vec_size = vecSizes[i]; + + std::ostringstream vecNameStr; + vecNameStr << test.type_str; + if (test.vec_size != 1) vecNameStr << test.vec_size; + + clMemWrapper streams[3]; + clProgramWrapper program; + clKernelWrapper kernel; + + size_t length = sizeof(T) * num_elements * test.vec_size; + + bool isRTZ = false; + RoundingMode oldMode = kDefaultRoundingMode; + + + // If we only support rtz mode + if (std::is_same<T, cl_half>::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigHalf) + { + isRTZ = true; + oldMode = get_round(); + } + } + else if (std::is_same<T, float>::value) + { + if (CL_FP_ROUND_TO_ZERO == fpConfigFloat) + { + isRTZ = true; + oldMode = get_round(); + } + } + + std::vector<T> inputs[]{ + std::vector<T>(test.vec_size * num_elements), + std::vector<T>(test.vec_size * num_elements) + }; + std::vector<T> output = + std::vector<T>(test.vec_size * num_elements); + + generate_random_inputs<T>(inputs); + + for (size_t i = 0; i < ARRAY_SIZE(streams); i++) + { + streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, + NULL, &err); + test_error(err, "clCreateBuffer failed."); + } + for (size_t i = 0; i < ARRAY_SIZE(inputs); i++) + { + err = + clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, + inputs[i].data(), 0, NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed."); + } + + std::string build_options = "-DTYPE="; + build_options.append(vecNameStr.str()) + .append(" -DOP=") + .append(1, test.op); + + const char *ptr = program_source.c_str(); + err = + create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_fp", build_options.c_str()); + + test_error(err, "create_single_kernel_helper failed"); + + for (size_t i = 0; i < ARRAY_SIZE(streams); i++) + { + err = + clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); + test_error(err, "clSetKernelArgs failed."); + } + + size_t threads[] = { static_cast<size_t>(num_elements) }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, + 0, NULL, NULL); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, + output.data(), 0, NULL, NULL); + test_error(err, "clEnqueueReadBuffer failed."); + + if (isRTZ) set_round(kRoundTowardZero, kfloat); + + err = verify_fp(inputs, output, test); + + if (isRTZ) set_round(oldMode, kfloat); + + test_error(err, "test verification failed"); + log_info("FP '%c' '%s' test passed\n", test.op, + vecNameStr.str().c_str()); + } + + return err; + } + + template <typename T> int test_fpmath_common() + { + int err = TEST_PASS; + if (std::is_same<cl_half, T>::value) + { + TestDef<T> tests[] = { { '+', half_plus, type2name[sizeof(T)] }, + { '-', half_minus, type2name[sizeof(T)] }, + { '*', half_mult, type2name[sizeof(T)] } }; + for (auto &test : tests) err |= test_fpmath<T>(test); + } + else + { + TestDef<T> tests[] = { + { '+', std::plus<T>(), type2name[sizeof(T)] }, + { '-', std::minus<T>(), type2name[sizeof(T)] }, + { '*', std::multiplies<T>(), type2name[sizeof(T)] } + }; + for (auto &test : tests) err |= test_fpmath<T>(test); + } + + return err; + } + + template <typename T> bool skip_type() + { + if (std::is_same<double, T>::value && !fp64Support) + return true; + else if (std::is_same<cl_half, T>::value && !fp16Support) + return true; + return false; + } + + template <std::size_t Cnt = 0, typename Type> + void iterate_type(const Type &t) + { + bool doTest = !skip_type<Type>(); + + if (doTest) + { + if (test_fpmath_common<Type>()) + { + throw std::runtime_error("test_fpmath_common failed\n"); + } + } + } + + template <std::size_t Cnt = 0, typename... Tp> + inline typename std::enable_if<Cnt == sizeof...(Tp), void>::type + for_each_elem( + const std::tuple<Tp...> &) // Unused arguments are given no names. + {} + + template <std::size_t Cnt = 0, typename... Tp> + inline typename std::enable_if < Cnt<sizeof...(Tp), void>::type + for_each_elem(const std::tuple<Tp...> &t) + { + iterate_type<Cnt>(std::get<Cnt>(t)); + for_each_elem<Cnt + 1, Tp...>(t); + } + +protected: + TypeIter it; + + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + int num_elements; + std::map<size_t, std::string> type2name; +}; + +int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator(device, context, queue, num_elements); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp deleted file mode 100644 index fced0f4e..00000000 --- a/test_conformance/basic/test_fpmath_float.cpp +++ /dev/null @@ -1,196 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> -#include "harness/rounding_mode.h" - -#include <algorithm> -#include <functional> -#include <string> -#include <vector> - -#include "procs.h" - -struct TestDef -{ - const char op; - std::function<float(float, float)> ref; -}; - -static const char *fp_kernel_code = R"( -__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst) -{ - int tid = get_global_id(0); - - dst[tid] = srcA[tid] OP srcB[tid]; -})"; - -static int verify_fp(std::vector<float> (&input)[2], std::vector<float> &output, - const TestDef &test) -{ - - auto &inA = input[0]; - auto &inB = input[1]; - for (int i = 0; i < output.size(); i++) - { - float r = test.ref(inA[i], inB[i]); - if (r != output[i]) - { - log_error("FP '%c' float test failed\n", test.op); - return -1; - } - } - - log_info("FP '%c' float test passed\n", test.op); - return 0; -} - - -void generate_random_inputs(std::vector<cl_float> (&input)[2]) -{ - RandomSeed seed(gRandomSeed); - - auto random_generator = [&seed]() { - return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), - MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed); - }; - - for (auto &v : input) - { - std::generate(v.begin(), v.end(), random_generator); - } -} - -template <size_t N> -int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue, - int num_elements, const std::string type_str, - const TestDef &test) -{ - clMemWrapper streams[3]; - clProgramWrapper program; - clKernelWrapper kernel; - - int err; - - size_t length = sizeof(cl_float) * num_elements * N; - - int isRTZ = 0; - RoundingMode oldMode = kDefaultRoundingMode; - - // If we only support rtz mode - if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device)) - { - isRTZ = 1; - oldMode = get_round(); - } - - - std::vector<cl_float> inputs[]{ std::vector<cl_float>(N * num_elements), - std::vector<cl_float>(N * num_elements) }; - std::vector<cl_float> output = std::vector<cl_float>(N * num_elements); - - generate_random_inputs(inputs); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - streams[i] = - clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err); - test_error(err, "clCreateBuffer failed."); - } - for (int i = 0; i < ARRAY_SIZE(inputs); i++) - { - err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length, - inputs[i].data(), 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed."); - } - - std::string build_options = "-DTYPE="; - build_options.append(type_str).append(" -DOP=").append(1, test.op); - - err = create_single_kernel_helper(context, &program, &kernel, 1, - &fp_kernel_code, "test_fp", - build_options.c_str()); - - test_error(err, "create_single_kernel_helper failed"); - - for (int i = 0; i < ARRAY_SIZE(streams); i++) - { - err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]); - test_error(err, "clSetKernelArgs failed."); - } - - size_t threads[] = { static_cast<size_t>(num_elements) }; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, - NULL); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, - output.data(), 0, NULL, NULL); - test_error(err, "clEnqueueReadBuffer failed."); - - if (isRTZ) set_round(kRoundTowardZero, kfloat); - - err = verify_fp(inputs, output, test); - - if (isRTZ) set_round(oldMode, kfloat); - - return err; -} - - -template <size_t N> -int test_fpmath_common(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements, - const std::string type_str) -{ - TestDef tests[] = { { '+', std::plus<float>() }, - { '-', std::minus<float>() }, - { '*', std::multiplies<float>() } }; - int err = TEST_PASS; - - for (const auto &test : tests) - { - err |= test_fpmath<N>(device, context, queue, num_elements, type_str, - test); - } - - return err; -} - -int test_fpmath_float(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<1>(device, context, queue, num_elements, "float"); -} - -int test_fpmath_float2(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<2>(device, context, queue, num_elements, - "float2"); -} - -int test_fpmath_float4(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - return test_fpmath_common<4>(device, context, queue, num_elements, - "float4"); -} diff --git a/test_conformance/basic/test_get_linear_ids.cpp b/test_conformance/basic/test_get_linear_ids.cpp index 3496fd0b..ee7dfb2f 100644 --- a/test_conformance/basic/test_get_linear_ids.cpp +++ b/test_conformance/basic/test_get_linear_ids.cpp @@ -104,15 +104,19 @@ test_get_linear_ids(cl_device_id device, cl_context context, cl_command_queue qu switch (dims) { case 1: - log_info(" testing offset=%u global=%u local=%u...\n", gwo[0], gws[0], lws[0]); + log_info(" testing offset=%zu global=%zu local=%zu...\n", gwo[0], + gws[0], lws[0]); break; case 2: - log_info(" testing offset=(%u,%u) global=(%u,%u) local=(%u,%u)...\n", - gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]); + log_info(" testing offset=(%zu,%zu) global=(%zu,%zu) " + "local=(%zu,%zu)...\n", + gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]); break; case 3: - log_info(" testing offset=(%u,%u,%u) global=(%u,%u,%u) local=(%u,%u,%u)...\n", - gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]); + log_info(" testing offset=(%zu,%zu,%zu) global=(%zu,%zu,%zu) " + "local=(%zu,%zu,%zu)...\n", + gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], + lws[1], lws[2]); break; } diff --git a/test_conformance/basic/test_hiloeo.cpp b/test_conformance/basic/test_hiloeo.cpp index 3470ad00..4e921a6e 100644 --- a/test_conformance/basic/test_hiloeo.cpp +++ b/test_conformance/basic/test_hiloeo.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -13,14 +13,13 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - +#include <iomanip> +#include <limits.h> #include <stdio.h> #include <string.h> -#include <limits.h> #include <sys/types.h> #include <sys/stat.h> - +#include <vector> #include "procs.h" @@ -31,9 +30,10 @@ int odd_offset( int index, int vectorSize ) { return index * 2 + 1; } typedef int (*OffsetFunc)( int index, int vectorSize ); static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset }; -typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName ); static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" }; -static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" }; +static const char *test_str_names[] = { "char", "uchar", "short", "ushort", + "int", "uint", "long", "ulong", + "half", "float", "double" }; static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16}; static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16}; @@ -45,43 +45,41 @@ static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4}; // strcat(gentype, vector_size_names[out_vector_idx[i]]); static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"}; -static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 }; +static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 2, 4, 8 }; static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse ); int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { - cl_int *input_ptr, *output_ptr, *p; int err; - cl_uint i; int hasDouble = is_extension_available( device, "cl_khr_fp64" ); + int hasHalf = is_extension_available(device, "cl_khr_fp16"); cl_uint vectorSize, operatorToUse; cl_uint type; - MTdata d; + MTdataHolder d(gRandomSeed); int expressionMode; int numExpressionModes = 2; size_t length = sizeof(cl_int) * 4 * n_elems; - input_ptr = (cl_int*)malloc(length); - output_ptr = (cl_int*)malloc(length); + std::vector<cl_int> input_ptr(4 * n_elems); + std::vector<cl_int> output_ptr(4 * n_elems); - p = input_ptr; - d = init_genrand( gRandomSeed ); - for (i=0; i<4 * (cl_uint) n_elems; i++) - p[i] = genrand_int32(d); - free_mtdata(d); d = NULL; + for (cl_uint i = 0; i < 4 * (cl_uint)n_elems; i++) + input_ptr[i] = genrand_int32(d); for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ ) { // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes size_t elementCount = length / kSizes[type]; - cl_mem streams[2]; + clMemWrapper streams[2]; // skip double if unavailable if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" ))) continue; + if (!hasHalf && (0 == strcmp(test_str_names[type], "half"))) continue; + if( !gHasLong && (( 0 == strcmp( test_str_names[type], "long" )) || ( 0 == strcmp( test_str_names[type], "ulong" )))) @@ -104,12 +102,9 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, return -1; } - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, + input_ptr.data(), 0, NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed\n"); for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ ) { @@ -118,8 +113,8 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) { for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) { - cl_program program = NULL; - cl_kernel kernel = NULL; + clProgramWrapper program; + clKernelWrapper kernel; cl_uint outVectorSize = out_vector_idx[vectorSize]; char expression[1024]; @@ -139,92 +134,64 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, "}\n" }; - if(expressionMode == 0) { - sprintf(expression, "srcA[tid]"); - } else if(expressionMode == 1) { - switch(vector_sizes[vectorSize]) { - case 16: - sprintf(expression, - "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))", - test_str_names[type] - ); - break; - case 8: - sprintf(expression, - "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))", - test_str_names[type] - ); - break; - case 4: - sprintf(expression, - "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))", - test_str_names[type] - ); - break; - case 3: - sprintf(expression, - "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))", - test_str_names[type] - ); - break; - case 2: - sprintf(expression, - "((%s2)(srcA[tid].s0, srcA[tid].s1))", - test_str_names[type] - ); - break; - default : - sprintf(expression, "srcA[tid]"); - log_info("Default\n"); - } - } else { - sprintf(expression, "srcA[tid]"); + if (expressionMode == 1 && vector_sizes[vectorSize] != 1) + { + std::ostringstream sstr; + const char *index_chars[] = { "0", "1", "2", "3", + "4", "5", "6", "7", + "8", "9", "A", "B", + "C", "D", "E", "f" }; + sstr << "((" << test_str_names[type] + << std::to_string(vector_sizes[vectorSize]) + << ")("; + for (unsigned i = 0; i < vector_sizes[vectorSize]; i++) + sstr << " srcA[tid].s" << index_chars[i] << ","; + sstr.seekp(-1, sstr.cur); + sstr << "))"; + std::snprintf(expression, sizeof(expression), "%s", + sstr.str().c_str()); + } + else + { + std::snprintf(expression, sizeof(expression), + "srcA[tid]"); } if (0 == strcmp( test_str_names[type], "double" )) source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + if (0 == strcmp(test_str_names[type], "half")) + source[0] = + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + char kernelName[128]; snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] ); err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName ); - if (err) - return -1; + test_error(err, "create_single_kernel_helper failed\n"); err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } + test_error(err, "clSetKernelArg failed\n"); //Wipe the output buffer clean uint32_t pattern = 0xdeadbeef; - memset_pattern4( output_ptr, &pattern, length ); - err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + memset_pattern4(output_ptr.data(), &pattern, length); + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, + length, output_ptr.data(), 0, + NULL, NULL); + test_error(err, "clEnqueueWriteBuffer failed\n"); size_t size = elementCount / (vector_aligns[vectorSize]); err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } + test_error(err, "clEnqueueNDRangeKernel failed\n"); - err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + length, output_ptr.data(), 0, + NULL, NULL); + test_error(err, "clEnqueueReadBuffer failed\n"); - char *inP = (char *)input_ptr; - char *outP = (char *)output_ptr; + char *inP = (char *)input_ptr.data(); + char *outP = (char *)output_ptr.data(); outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) - ( vector_sizes[ out_vector_idx[vectorSize] ] ) ); // was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) ); @@ -240,180 +207,88 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, inP += kSizes[type] * ( vector_aligns[vectorSize] ); outP += kSizes[type] * ( vector_aligns[outVectorSize] ); } - - clReleaseKernel( kernel ); - clReleaseProgram( program ); log_info( "." ); fflush( stdout ); } } } - - clReleaseMemObject( streams[0] ); - clReleaseMemObject( streams[1] ); log_info( "done\n" ); } log_info("HiLoEO test passed\n"); - - free(input_ptr); - free(output_ptr); - return err; } -static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse ) +template <typename T> +cl_int verify(void *in, void *out, size_t elementCount, int type, + int vectorSize, int operatorToUse, size_t cmpVectorSize) { - cl_ulong array[8]; + size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]]; + size_t elementSize = kSizes[type]; + OffsetFunc f = offsetFuncs[operatorToUse]; + cl_ulong array[8]; void *p = array; - size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]]; - size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]]; - // was 1 << (vectorSize-1); - OffsetFunc f = offsetFuncs[ operatorToUse ]; - size_t elementSize = kSizes[type]; - - if(vector_size_names[vectorSize][0] == '3') { - if(operatorToUse_names[operatorToUse][0] == 'h' || - operatorToUse_names[operatorToUse][0] == 'o') // hi or odd - { - cmpVectorSize = 1; // special case for vec3 ignored values - } - } - switch( elementSize ) - { - case 1: - { - char *i = (char*)in; - char *o = (char*)out; - size_t j; - cl_uint k; - OffsetFunc f = offsetFuncs[ operatorToUse ]; - - for( k = 0; k < elementCount; k++ ) - { - char *o2 = (char*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", %d", i[j] ); - log_info( " } --> { %d", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", %d", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; + std::ostringstream ss; - case 2: - { - short *i = (short*)in; - short *o = (short*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - short *o2 = (short*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", %d", i[j] ); - log_info( " } --> { %d", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", %d", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; + T *i = (T *)in, *o = (T *)out; - case 4: - { - int *i = (int*)in; - int *o = (int*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - int *o2 = (int *)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - for( j = 0; j < cmpVectorSize; j++ ) + for (cl_uint k = 0; k < elementCount; k++) + { + T *o2 = (T *)p; + for (size_t j = 0; j < halfVectorSize; j++) + o2[j] = i[f((int)j, (int)halfVectorSize * 2)]; + + if (memcmp(o, o2, elementSize * cmpVectorSize)) { - /* Allow float nans to be binary different */ - if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j]))) - { - log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", 0x%8.8x", i[j] ); - log_info( " } --> { 0x%8.8x", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", 0x%8.8x", o[j] ); - log_info( " }\n" ); + ss << "\n" + << k << ") Failure for" << test_str_names[type] + << vector_size_names[vectorSize] << '.' + << operatorToUse_names[operatorToUse] << " { " + << "0x" << std::setfill('0') << std::setw(elementSize * 2) + << std::hex << i[0]; + + for (size_t j = 1; j < halfVectorSize * 2; j++) ss << ", " << i[j]; + ss << " } --> { " << o[0]; + for (size_t j = 1; j < halfVectorSize; j++) ss << ", " << o[j]; + ss << " }\n"; return -1; - } } i += 2 * halfVectorSize; o += halfVectorSize; - } - } - break; - - case 8: - { - cl_ulong *i = (cl_ulong*)in; - cl_ulong *o = (cl_ulong*)out; - size_t j; - cl_uint k; - - for( k = 0; k < elementCount; k++ ) - { - cl_ulong *o2 = (cl_ulong*)p; - for( j = 0; j < halfVectorSize; j++ ) - o2[j] = i[ f((int)j, (int)halfVectorSize*2) ]; - - if( memcmp( o, o2, elementSize * cmpVectorSize ) ) - { - log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] ); - for( j = 1; j < halfVectorSize * 2; j++ ) - log_info( ", 0x%16.16llx", i[j] ); - log_info( " } --> { 0x%16.16llx", o[0] ); - for( j = 1; j < halfVectorSize; j++ ) - log_info( ", 0x%16.16llx", o[j] ); - log_info( " }\n" ); - return -1; - } - i += 2 * halfVectorSize; - o += halfVectorSize; - } - } - break; - - default: - log_info( "Internal error. Unknown data type\n" ); - return -2; } - return 0; } +static int CheckResults(void *in, void *out, size_t elementCount, int type, + int vectorSize, int operatorToUse) +{ + size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]]; + size_t elementSize = kSizes[type]; + if (vector_size_names[vectorSize][0] == '3') + { + if (operatorToUse_names[operatorToUse][0] == 'h' + || operatorToUse_names[operatorToUse][0] == 'o') // hi or odd + { + cmpVectorSize = 1; // special case for vec3 ignored values + } + } + switch (elementSize) + { + case 1: + return verify<char>(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + case 2: + return verify<short>(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + case 4: + return verify<int>(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + case 8: + return verify<cl_ulong>(in, out, elementCount, type, vectorSize, + operatorToUse, cmpVectorSize); + default: log_info("Internal error. Unknown data type\n"); return -2; + } +} diff --git a/test_conformance/basic/test_image_r8.cpp b/test_conformance/basic/test_image_r8.cpp index b633d6ab..2dca1611 100644 --- a/test_conformance/basic/test_image_r8.cpp +++ b/test_conformance/basic/test_image_r8.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,163 +21,111 @@ #include <sys/types.h> #include <sys/stat.h> +#include <algorithm> +#include <vector> #include "procs.h" -static const char *r_uint8_kernel_code = -"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n" -"{\n" -" int tid_x = get_global_id(0);\n" -" int tid_y = get_global_id(1);\n" -" int indx = tid_y * get_image_width(srcimg) + tid_x;\n" -" uint4 color;\n" -"\n" -" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n" -" dst[indx] = (unsigned char)(color.x);\n" -"\n" -"}\n"; - - -static unsigned char * -generate_8bit_image(int w, int h, MTdata d) +namespace { +const char *r_uint8_kernel_code = R"( +__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler) { - unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char)); - int i; + int tid_x = get_global_id(0); + int tid_y = get_global_id(1); + int indx = tid_y * get_image_width(srcimg) + tid_x; + uint4 color; - for (i=0; i<w*h; i++) - ptr[i] = (unsigned char)genrand_int32(d); + color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y)); + dst[indx] = (unsigned char)(color.x); +})"; - return ptr; -} -static int -verify_8bit_image(unsigned char *image, unsigned char *outptr, int w, int h) +void generate_random_inputs(std::vector<cl_uchar> &v) { - int i; + RandomSeed seed(gRandomSeed); - for (i=0; i<w*h; i++) - { - if (outptr[i] != image[i]) - { - log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n"); - return -1; - } - } + auto random_generator = [&seed]() { + return static_cast<cl_uchar>(genrand_int32(seed)); + }; - log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n"); - return 0; + std::generate(v.begin(), v.end(), random_generator); } -int -test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +} +int test_image_r8(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) { - cl_mem streams[2]; - cl_image_format img_format; - cl_uchar *input_ptr, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[3]; - int img_width = 512; - int img_height = 512; - int err; - MTdata d; - - PASSIVE_REQUIRE_IMAGE_SUPPORT( device ) - - img_format.image_channel_order = CL_R; - img_format.image_channel_data_type = CL_UNSIGNED_INT8; + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + const size_t img_width = 512; + const size_t img_height = 512; + const size_t length = img_width * img_height; + int err; + + PASSIVE_REQUIRE_IMAGE_SUPPORT(device) + + const cl_image_format img_format = { CL_R, CL_UNSIGNED_INT8 }; // early out if this image type is not supported if (!is_image_format_supported(context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &img_format)) { log_info("WARNING: Image type not supported; skipping test.\n"); - return 0; + return TEST_SKIPPED_ITSELF; } - d = init_genrand( gRandomSeed ); - input_ptr = generate_8bit_image(img_width, img_height, d); - free_mtdata(d); d = NULL; + std::vector<cl_uchar> input(length); + std::vector<cl_uchar> output(length); + + generate_random_inputs(input); - output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height); streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &img_format, - img_width, img_height, 0, NULL, NULL); - if (!streams[0]) - { - log_error("create_image_2d failed\n"); - return -1; - } + img_width, img_height, 0, nullptr, &err); + test_error(err, "create_image_2d failed."); streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_uchar) * img_width * img_height, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } + clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err); + test_error(err, "clCreateBuffer failed."); - size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1}; - err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, - origin, region, 0, 0, - input_ptr, - 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clWriteImage failed: %d\n", err); - return -1; - } + const size_t origin[3] = { 0, 0, 0 }, + region[3] = { img_width, img_height, 1 }; + err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0, + input.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteImage failed."); - err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" ); - if (err) { - log_error("Failed to create kernel and program: %d\n", err); - return -1; - } + err = create_single_kernel_helper(context, &program, &kernel, 1, + &r_uint8_kernel_code, "test_r_uint8"); + test_error(err, "create_single_kernel_helper failed."); - cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); - test_error(err, "clCreateSampler failed"); + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err); + test_error(err, "clCreateSampler failed"); + + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); + test_error(err, "clSetKernelArgs failed\n"); + + size_t threads[] = { img_width, img_height }; + err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, threads, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed\n"); - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed: %d\n", err); - return -1; - } - threads[0] = (size_t)img_width; - threads[1] = (size_t)img_height; - err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, + output.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed\n"); + + if (0 != memcmp(input.data(), output.data(), length)) { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; + log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n"); + err = -1; } - - err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL ); - if (err != CL_SUCCESS) + else { - log_error("clEnqueueReadBuffer failed\n"); - return -1; + log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n"); } - err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height); - - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - clReleaseSampler(sampler); - free(input_ptr); - free(output_ptr); - return err; } - - - - - diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp deleted file mode 100644 index 3a8458c9..00000000 --- a/test_conformance/basic/test_int2float.cpp +++ /dev/null @@ -1,143 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> - - -#include "procs.h" - -const char *int2float_kernel_code = -"__kernel void test_int2float(__global int *src, __global float *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -"\n" -" dst[tid] = (float)src[tid];\n" -"\n" -"}\n"; - - -int -verify_int2float(cl_int *inptr, cl_float *outptr, int n) -{ - int i; - - for (i=0; i<n; i++) - { - if (outptr[i] != (float)inptr[i]) - { - log_error("INT2FLOAT test failed\n"); - return -1; - } - } - - log_info("INT2FLOAT test passed\n"); - return 0; -} - -int -test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem streams[2]; - cl_int *input_ptr; - cl_float *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[1]; - int err; - int i; - MTdata d; - - input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements); - output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_float) * num_elements, NULL, NULL); - if (!streams[1]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - - d = init_genrand( gRandomSeed ); - for (i=0; i<num_elements; i++) - input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d); - free_mtdata(d); d = NULL; - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clWriteArray failed\n"); - return -1; - } - - err = create_single_kernel_helper(context, &program, &kernel, 1, &int2float_kernel_code, "test_int2float"); - if (err != CL_SUCCESS) - { - log_error("create_single_kernel_helper failed\n"); - return -1; - } - - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - if (err != CL_SUCCESS) - { - log_error("clSetKernelArgs failed\n"); - return -1; - } - - threads[0] = (size_t)num_elements; - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } - - err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL ); - if (err != CL_SUCCESS) - { - log_error("clEnqueueReadBuffer failed\n"); - return -1; - } - - err = verify_int2float(input_ptr, output_ptr, num_elements); - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(output_ptr); - - return err; -} - - - - - diff --git a/test_conformance/basic/test_int2fp.cpp b/test_conformance/basic/test_int2fp.cpp new file mode 100644 index 00000000..dd5cc9a1 --- /dev/null +++ b/test_conformance/basic/test_int2fp.cpp @@ -0,0 +1,325 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "CL/cl_half.h" +#include "harness/compat.h" +#include "harness/errorHelpers.h" +#include "harness/stringHelpers.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <algorithm> +#include <cstdint> +#include <map> +#include <vector> + +#include "procs.h" + +extern cl_half_rounding_mode halfRoundingMode; + +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +namespace { +const char *int2float_kernel_code = R"( +%s +__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst) +{ + int tid = get_global_id(0); + + dst[tid] = (TYPE_Y)src[tid]; + +})"; + +template <bool int2fp> struct TypesIterator +{ + TypesIterator(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elems, const char *test_name) + : context(context), queue(queue), test_name(test_name), + num_elements(num_elems) + { + fp16Support = is_extension_available(deviceID, "cl_khr_fp16"); + fp64Support = is_extension_available(deviceID, "cl_khr_fp64"); + + type2name[sizeof(cl_half)] = std::make_pair("half", "short"); + type2name[sizeof(cl_float)] = std::make_pair("float", "int"); + type2name[sizeof(cl_double)] = std::make_pair("double", "long"); + + std::tuple<cl_float, cl_half, cl_double> it; + for_each_elem(it); + } + + template <typename T> void generate_random_inputs(std::vector<T> &v) + { + RandomSeed seed(gRandomSeed); + + if (sizeof(T) == sizeof(cl_half)) + { + // Bound generated half values to 0x1.ffcp+14(32752.0) which is the + // largest cl_half value smaller than the max value of cl_short, + // 32767. + if (int2fp) + { + auto random_generator = [&seed]() { + return (cl_short)get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else + { + auto random_generator = [&seed]() { + return HFF(get_random_float( + -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), + MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed)); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + else if (sizeof(T) == sizeof(cl_float)) + { + auto random_generator = [&seed]() { + return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + else if (sizeof(T) == sizeof(cl_double)) + { + auto random_generator = [&seed]() { + return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63), + seed); + }; + std::generate(v.begin(), v.end(), random_generator); + } + } + + template <typename Tx, typename Ty> static bool equal_value(Tx a, Ty b) + { + return a == (Tx)b; + } + + static bool equal_value_from_half(cl_short a, cl_half b) + { + return a == (cl_short)HTF(b); + } + + static bool equal_value_to_half(cl_half a, cl_short b) + { + return a == HFF((float)b); + } + + + template <typename Tx, typename Ty> + int verify_X2Y(std::vector<Tx> input, std::vector<Ty> output) + { + if (std::is_same<Tx, cl_half>::value + || std::is_same<Ty, cl_half>::value) + { + bool res = true; + if (int2fp) + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_to_half); + else + res = std::equal(output.begin(), output.end(), input.begin(), + equal_value_from_half); + + if (!res) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + else + { + if (!std::equal(output.begin(), output.end(), input.begin(), + equal_value<Tx, Ty>)) + { + log_error("%s test failed\n", test_name.c_str()); + return -1; + } + } + + log_info("%s test passed\n", test_name.c_str()); + return 0; + } + + template <typename Tx, typename Ty> int test_X2Y() + { + clMemWrapper streams[2]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; + + std::vector<Tx> input(num_elements); + std::vector<Ty> output(num_elements); + + streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Tx) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, + sizeof(Ty) * num_elements, nullptr, &err); + test_error(err, "clCreateBuffer failed."); + + generate_random_inputs(input); + + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, + sizeof(Tx) * num_elements, input.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + + std::string src_name = type2name[sizeof(Tx)].first; + std::string dst_name = type2name[sizeof(Tx)].second; + if (int2fp) std::swap(src_name, dst_name); + + std::string build_options; + build_options.append("-DTYPE_X=").append(src_name.c_str()); + build_options.append(" -DTYPE_Y=").append(dst_name.c_str()); + + std::string extension; + if (sizeof(Tx) == sizeof(cl_double)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; + + if (sizeof(Tx) == sizeof(cl_half)) + extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + + std::string kernelSource = + str_sprintf(int2float_kernel_code, extension.c_str()); + const char *ptr = kernelSource.c_str(); + + err = create_single_kernel_helper(context, &program, &kernel, 1, &ptr, + "test_X2Y", build_options.c_str()); + test_error(err, "create_single_kernel_helper failed."); + + err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); + err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); + test_error(err, "clSetKernelArg failed."); + + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, + nullptr, 0, nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed."); + + err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, + sizeof(Ty) * num_elements, output.data(), 0, + nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed."); + + err = verify_X2Y(input, output); + + return err; + } + + template <typename T> bool skip_type() + { + if (std::is_same<double, T>::value && !fp64Support) + return true; + else if (std::is_same<cl_half, T>::value && !fp16Support) + return true; + return false; + } + + template <std::size_t Cnt = 0, typename T> void iterate_type(const T &t) + { + bool doTest = !skip_type<T>(); + + if (doTest) + { + typedef typename std::conditional< + (sizeof(T) == sizeof(std::int16_t)), std::int16_t, + typename std::conditional<(sizeof(T) == sizeof(std::int32_t)), + std::int32_t, + std::int64_t>::type>::type U; + if (int2fp) + { + if (test_X2Y<U, T>()) + throw std::runtime_error("test_X2Y failed\n"); + } + else + { + if (test_X2Y<T, U>()) + throw std::runtime_error("test_X2Y failed\n"); + } + } + } + + template <std::size_t Cnt = 0, typename... Tp> + inline typename std::enable_if<Cnt == sizeof...(Tp), void>::type + for_each_elem( + const std::tuple<Tp...> &) // Unused arguments are given no names. + {} + + template <std::size_t Cnt = 0, typename... Tp> + inline typename std::enable_if < Cnt<sizeof...(Tp), void>::type + for_each_elem(const std::tuple<Tp...> &t) + { + iterate_type<Cnt>(std::get<Cnt>(t)); + for_each_elem<Cnt + 1, Tp...>(t); + } + +protected: + cl_context context; + cl_command_queue queue; + + cl_device_fp_config fpConfigHalf; + cl_device_fp_config fpConfigFloat; + + bool fp16Support; + bool fp64Support; + + std::map<size_t, std::pair<std::string, std::string>> type2name; + + std::string test_name; + int num_elements; +}; + +} + +int test_int2fp(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator<true>(device, context, queue, num_elements, "INT2FP"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} + +int test_fp2int(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) +{ + try + { + TypesIterator<false>(device, context, queue, num_elements, "FP2INT"); + } catch (const std::runtime_error &e) + { + log_error("%s", e.what()); + return TEST_FAIL; + } + + return TEST_PASS; +} diff --git a/test_conformance/basic/test_intmath.cpp b/test_conformance/basic/test_intmath.cpp index 6fd41abb..5a4e9c2a 100644 --- a/test_conformance/basic/test_intmath.cpp +++ b/test_conformance/basic/test_intmath.cpp @@ -123,7 +123,7 @@ int test_intmath(cl_device_id device, cl_context context, size_t datasize = sizeof(T) * num_elements * N; // Create device buffers. - for (int i = 0; i < ARRAY_SIZE(streams); i++) + for (size_t i = 0; i < ARRAY_SIZE(streams); i++) { streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &err); @@ -175,7 +175,7 @@ int test_intmath(cl_device_id device, cl_context context, test_error(err, "clEnqueueReadBuffer failed\n"); // Verify results - for (int i = 0; i < num_elements * N; i++) + for (unsigned i = 0; i < num_elements * N; i++) { T r = test.ref(inputA[i], inputB[i], inputC[i]); if (r != output[i]) diff --git a/test_conformance/basic/test_loop.cpp b/test_conformance/basic/test_loop.cpp index 1a91d9e4..1c9acd1a 100644 --- a/test_conformance/basic/test_loop.cpp +++ b/test_conformance/basic/test_loop.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -21,45 +21,45 @@ #include <sys/types.h> #include <sys/stat.h> +#include <vector> #include "procs.h" -const char *loop_kernel_code = -"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n" -"{\n" -" int tid = get_global_id(0);\n" -" int n = get_global_size(0);\n" -" int i, j;\n" -"\n" -" dst[tid] = 0;\n" -" for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n" -" {\n" -" if (j >= n)\n" -" j = 0;\n" -" dst[tid] += src[j];\n" -" }\n" -"\n" -"}\n"; - - -int -verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n) +namespace { +const char *loop_kernel_code = R"( +__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst) { - int r, i, j, k; + int tid = get_global_id(0); + int n = get_global_size(0); + int i, j; - for (i=0; i<n; i++) + dst[tid] = 0; + for (i=0, j=loopindx[tid]; i<loopcnt[tid]; i++, j++) { - r = 0; - for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++) + if (j >= n) + j = 0; + dst[tid] += src[j]; + } +} +)"; + + +int verify_loop(std::vector<cl_int> inptr, std::vector<cl_int> loopindx, + std::vector<cl_int> loopcnt, std::vector<cl_int> outptr, int n) +{ + for (int i = 0; i < n; i++) + { + int r = 0; + for (int j = 0, k = loopindx[i]; j < loopcnt[i]; j++, k++) { - if (k >= n) - k = 0; + if (k >= n) k = 0; r += inptr[k]; } if (r != outptr[i]) { - log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r); + log_error("LOOP test failed: %d found, expected %d\n", outptr[i], + r); return -1; } } @@ -67,119 +67,69 @@ verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n) log_info("LOOP test passed\n"); return 0; } - -int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) +} +int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements) { - cl_mem streams[4]; - cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr; - cl_program program; - cl_kernel kernel; - size_t threads[1]; - int err, i; + clMemWrapper streams[4]; + clProgramWrapper program; + clKernelWrapper kernel; + int err; size_t length = sizeof(cl_int) * num_elements; - input_ptr = (cl_int*)malloc(length); - loop_indx = (cl_int*)malloc(length); - loop_cnt = (cl_int*)malloc(length); - output_ptr = (cl_int*)malloc(length); + std::vector<cl_int> input(length); + std::vector<cl_int> loop_indx(length); + std::vector<cl_int> loop_cnt(length); + std::vector<cl_int> output(length); - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[0]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[1]) + for (auto &stream : streams) { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[2]) - { - log_error("clCreateBuffer failed\n"); - return -1; - } - streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL); - if (!streams[3]) - { - log_error("clCreateBuffer failed\n"); - return -1; + stream = + clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err); + test_error(err, "clCreateBuffer failed."); } - MTdata d = init_genrand( gRandomSeed ); - for (i=0; i<num_elements; i++) + RandomSeed seed(gRandomSeed); + for (int i = 0; i < num_elements; i++) { - input_ptr[i] = (int)genrand_int32(d); - loop_indx[i] = (int)get_random_float(0, num_elements-1, d); - loop_cnt[i] = (int)get_random_float(0, num_elements/32, d); - } - free_mtdata(d); d = NULL; - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } - err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } - err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } - - err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" ); - if (err) - return -1; - - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]); - err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]); - err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]); - if (err != CL_SUCCESS) + input[i] = static_cast<int>(genrand_int32(seed)); + loop_indx[i] = + static_cast<int>(get_random_float(0, num_elements - 1, seed)); + loop_cnt[i] = + static_cast<int>(get_random_float(0, num_elements / 32, seed)); + }; + + err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, + input.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, + loop_indx.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, + loop_cnt.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueWriteBuffer failed."); + + err = create_single_kernel_helper(context, &program, &kernel, 1, + &loop_kernel_code, "test_loop"); + test_error(err, "create_single_kernel_helper failed."); + + for (int i = 0; i < ARRAY_SIZE(streams); i++) { - log_error("clSetKernelArgs failed\n"); - return -1; + err = clSetKernelArg(kernel, i, sizeof streams[i], &streams[i]); + test_error(err, "clSetKernelArgs failed\n"); } - threads[0] = (unsigned int)num_elements; - err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueNDRangeKernel failed\n"); - return -1; - } - - err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clReadArray failed\n"); - return -1; - } - - err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements); - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseMemObject(streams[3]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(loop_indx); - free(loop_cnt); - free(output_ptr); + size_t threads[] = { (size_t)num_elements }; + err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0, + nullptr, nullptr); + test_error(err, "clEnqueueNDRangeKernel failed\n"); - return err; -} + err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, + output.data(), 0, nullptr, nullptr); + test_error(err, "clEnqueueReadBuffer failed\n"); + + err = verify_loop(input, loop_indx, loop_cnt, output, num_elements); + return err; +} diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp index a46713e9..41cc0199 100644 --- a/test_conformance/basic/test_progvar.cpp +++ b/test_conformance/basic/test_progvar.cpp @@ -581,13 +581,19 @@ static void l_load_abilities(cl_device_id device) cl_uint max_dim = 0; status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(max_dim), &max_dim, 0); - assert(status == CL_SUCCESS); + if (check_error(status, + "clGetDeviceInfo for " + "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed.")) + return; assert(max_dim > 0); size_t max_id[3]; max_id[0] = 0; status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, max_dim * sizeof(size_t), &max_id[0], 0); - assert(status == CL_SUCCESS); + if (check_error(status, + "clGetDeviceInfo for " + "CL_DEVICE_MAX_WORK_ITEM_SIZES failed.")) + return; l_max_global_id0 = max_id[0]; } @@ -597,7 +603,10 @@ static void l_load_abilities(cl_device_id device) status = clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE, sizeof(l_linker_available), &l_linker_available, 0); - assert(status == CL_SUCCESS); + if (check_error(status, + "clGetDeviceInfo for " + "CL_DEVICE_LINKER_AVAILABLE failed.")) + return; } } @@ -903,6 +912,7 @@ static std::string global_decls(const TypeInfo& ti, bool with_init) vol, tn, vol, tn, vol, tn, vol, tn); } assert(num_printed < sizeof(decls)); + (void)num_printed; return std::string(decls); } @@ -983,6 +993,7 @@ static std::string writer_function(const TypeInfo& ti) writer_template_atomic, ti.get_buf_elem_type()); } assert(num_printed < sizeof(writer_src)); + (void)num_printed; std::string result = writer_src; return result; } @@ -1024,6 +1035,7 @@ static std::string reader_function(const TypeInfo& ti) ti.get_buf_elem_type(), ti.get_buf_elem_type()); } assert(num_printed < sizeof(reader_src)); + (void)num_printed; std::string result = reader_src; return result; } diff --git a/test_conformance/basic/test_vec_type_hint.cpp b/test_conformance/basic/test_vec_type_hint.cpp index 33168b13..0ba105db 100644 --- a/test_conformance/basic/test_vec_type_hint.cpp +++ b/test_conformance/basic/test_vec_type_hint.cpp @@ -13,28 +13,27 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> - +#include <vector> #include "procs.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" - static const char *sample_kernel = { - "%s\n" // optional pragma string - "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n" - "{\n" - " int tid = get_global_id(0);\n" - " dst[tid] = src[tid];\n" - "\n" - "}\n" + "%s\n" + "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global " + "int *src, __global int *dst)\n" + "{\n" + " int tid = get_global_id(0);\n" + " dst[tid] = src[tid];\n" + "\n" + "}\n" }; int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) @@ -42,66 +41,85 @@ int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_que int error; int vec_type_index, vec_size_index; - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - const char *size_names[] = {"", "2", "4", "8", "16"}; - char *program_source; - - program_source = (char*)malloc(sizeof(char)*4096); + ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, + kLong, kULong, kFloat, kHalf, kDouble }; + const char *size_names[] = { "", "2", "4", "8", "16" }; + std::vector<char> program_source(4096); + + for (vec_type_index = 0; + vec_type_index < sizeof(vecType) / sizeof(vecType[0]); vec_type_index++) + { + + if (vecType[vec_type_index] == kHalf + && !is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + continue; + } + else if (vecType[vec_type_index] == kDouble + && !is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info( + "Extension cl_khr_fp64 not supported; skipping double tests.\n"); + continue; + } + else if ((vecType[vec_type_index] == kLong + || vecType[vec_type_index] == kULong) + && !gHasLong) + { + log_info( + "Extension cl_khr_int64 not supported; skipping long tests.\n"); + continue; + } - for (vec_type_index=0; vec_type_index<10; vec_type_index++) { - if (vecType[vec_type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); - continue; + for (vec_size_index = 0; vec_size_index < 5; vec_size_index++) + { + clProgramWrapper program; + clKernelWrapper kernel; + clMemWrapper in, out; + size_t global[] = { 1, 1, 1 }; + + log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + char extension[128] = { 0 }; + if (vecType[vec_type_index] == kDouble) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); + else if (vecType[vec_type_index] == kHalf) + std::snprintf(extension, sizeof(extension), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + + sprintf(program_source.data(), sample_kernel, extension, + get_explicit_type_name(vecType[vec_type_index]), + size_names[vec_size_index]); + + const char *src = &program_source.front(); + error = create_single_kernel_helper(context, &program, &kernel, 1, + &src, "sample_test"); + test_error(error, "create_single_kernel_helper failed"); + + in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * 10, + NULL, &error); + test_error(error, "clCreateBuffer failed"); + + error = clSetKernelArg(kernel, 0, sizeof(in), &in); + test_error(error, "clSetKernelArg failed"); + error = clSetKernelArg(kernel, 1, sizeof(out), &out); + test_error(error, "clSetKernelArg failed"); + + error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, + 0, NULL, NULL); + test_error(error, "clEnqueueNDRangeKernel failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); } - log_info("Testing doubles.\n"); - } - - if (vecType[vec_type_index] == kLong || vecType[vec_type_index] == kULong) - { - if (!gHasLong) - { - log_info("Extension cl_khr_int64 not supported; skipping long tests.\n"); - continue; - } - } - - for (vec_size_index=0; vec_size_index<5; vec_size_index++) { - clProgramWrapper program; - clKernelWrapper kernel; - clMemWrapper in, out; - size_t global[] = {1,1,1}; - - log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - program_source[0] = '\0'; - sprintf(program_source, sample_kernel, - (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]); - - error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" ); - if( error != 0 ) - return error; - - in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error); - test_error(error, "clCreateBuffer failed"); - - error = clSetKernelArg(kernel, 0, sizeof(in), &in); - test_error(error, "clSetKernelArg failed"); - error = clSetKernelArg(kernel, 1, sizeof(out), &out); - test_error(error, "clSetKernelArg failed"); - - error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL); - test_error(error, "clEnqueueNDRangeKernel failed"); - - error = clFinish(queue); - test_error(error, "clFinish failed"); - } } - free(program_source); - return 0; } diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp index d9530b4e..6bae156a 100644 --- a/test_conformance/basic/test_vector_creation.cpp +++ b/test_conformance/basic/test_vector_creation.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -17,48 +17,41 @@ #include "harness/conversions.h" #include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include <vector> - - +#include <CL/cl_half.h> #define DEBUG 0 #define DEPTH 16 // Limit the maximum code size for any given kernel. -#define MAX_CODE_SIZE (1024*32) - -const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1}; -const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"}; - -// Creates a kernel by enumerating all possible ways of building the vector out of vloads -// skip_to_results will skip results up to a given number. If the amount of code generated -// is greater than MAX_CODE_SIZE, this function will return the number of results used, -// which can then be used as the skip_to_result value to continue where it left off. -int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) { +#define MAX_CODE_SIZE (1024 * 32) + +static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 }; +static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 }; +static const char *size_names[] = { "", "2", "3", "4", "8", + "16", "!!a", "!!b", "!!c", "!!d" }; +static char extension[128] = { 0 }; + +// Creates a kernel by enumerating all possible ways of building the vector out +// of vloads skip_to_results will skip results up to a given number. If the +// amount of code generated is greater than MAX_CODE_SIZE, this function will +// return the number of results used, which can then be used as the +// skip_to_result value to continue where it left off. +int create_kernel(ExplicitType type, int output_size, char *program, + int *number_of_results, int skip_to_result) +{ int number_of_sizes; - switch (output_size) { - case 1: - number_of_sizes = 1; - break; - case 2: - number_of_sizes = 2; - break; - case 3: - number_of_sizes = 3; - break; - case 4: - number_of_sizes = 4; - break; - case 8: - number_of_sizes = 5; - break; - case 16: - number_of_sizes = 6; - break; - default: - log_error("Invalid size: %d\n", output_size); - return -1; + switch (output_size) + { + case 1: number_of_sizes = 1; break; + case 2: number_of_sizes = 2; break; + case 3: number_of_sizes = 3; break; + case 4: number_of_sizes = 4; break; + case 8: number_of_sizes = 5; break; + case 16: number_of_sizes = 6; break; + default: log_error("Invalid size: %d\n", output_size); return -1; } int total_results = 0; @@ -67,102 +60,125 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number int total_program_length = 0; int aborted_due_to_size = 0; - if (skip_to_result < 0) - skip_to_result = 0; + if (skip_to_result < 0) skip_to_result = 0; // The line of code for the vector creation char line[1024]; - // Keep track of what size vector we are using in each position so we can iterate through all fo them + // Keep track of what size vector we are using in each position so we can + // iterate through all fo them int pos[DEPTH]; int max_size = output_size; if (DEBUG > 1) log_info("max_size: %d\n", max_size); program[0] = '\0'; - sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n", - type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "", - get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]); + sprintf(program, + "%s\n__kernel void test_vector_creation(__global %s *src, __global " + "%s%s *result) {\n", + extension, get_explicit_type_name(type), + get_explicit_type_name(type), + (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]); total_program_length += (int)strlen(program); - char storePrefix[ 128 ], storeSuffix[ 128 ]; + char storePrefix[128], storeSuffix[128]; - // Start out trying sizes 1,1,1,1,1... - for (int i=0; i<DEPTH; i++) - pos[i] = 0; + // Start out trying sizes 1,1,1... by initializing pos array to zeros for + // all vector sizes except 16. For 16-sizes initial_no_sizes array holds + // factor to omit time consuming, similar creation cases tested earlier. + for (int i = 0; i < DEPTH; i++) pos[i] = initial_no_sizes[number_of_sizes]; int done = 0; - while (!done) { - if (DEBUG > 1) { + while (!done) + { + if (DEBUG > 1) + { log_info("pos size[] = ["); - for (int k=0; k<DEPTH; k++) - log_info(" %d ", pos[k]); + for (int k = 0; k < DEPTH; k++) log_info(" %d ", pos[k]); log_info("]\n"); } - // Go through the selected vector sizes and see if the first n of them fit the + // Go through the selected vector sizes and see if the first n of them + // fit the // required size exactly. int size_so_far = 0; int vloads; - for ( vloads=0; vloads<DEPTH; vloads++) { - if (size_so_far + sizes[pos[vloads]] <= max_size) { + for (vloads = 0; vloads < DEPTH; vloads++) + { + if (size_so_far + sizes[pos[vloads]] <= max_size) + { size_so_far += sizes[pos[vloads]]; - } else { + } + else + { break; } } - if (DEBUG > 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); + if (DEBUG > 1) + log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far); - // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations + // If they did not fit the required size exactly it is too long, so + // there is no point in checking any other combinations // of the sizes to the right. Prune them from the search. - if (size_so_far != max_size) { + if (size_so_far != max_size) + { // Zero all the sizes to the right - for (int k=vloads+1; k<DEPTH; k++) { + for (int k = vloads + 1; k < DEPTH; k++) + { pos[k] = 0; } // Increment this current size and propagate the values up if needed - for (int d=vloads; d>=0; d--) { + for (int d = vloads; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over then we are done done = 1; break; } - } else { + } + else + { break; } } - // Go on to the next size since this one (and all others "under" it) didn't fit + // Go on to the next size since this one (and all others "under" it) + // didn't fit continue; } // Generate the actual load line if we are building this part - line[0]= '\0'; - if (skip_to_result == 0 || total_results >= skip_to_result) { - if( number_of_sizes == 3 ) + line[0] = '\0'; + if (skip_to_result == 0 || total_results >= skip_to_result) + { + if (number_of_sizes == 3) { - sprintf( storePrefix, "vstore3( " ); - sprintf( storeSuffix, ", %d, result )", current_result ); + sprintf(storePrefix, "vstore3( "); + sprintf(storeSuffix, ", %d, result )", current_result); } else { - sprintf( storePrefix, "result[%d] = ", current_result ); - storeSuffix[ 0 ] = 0; + sprintf(storePrefix, "result[%d] = ", current_result); + storeSuffix[0] = 0; } - sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size); + sprintf(line, "\t%s(%s%d)(", storePrefix, + get_explicit_type_name(type), output_size); current_result++; int offset = 0; - for (int i=0; i<vloads; i++) { + for (int i = 0; i < vloads; i++) + { if (pos[i] == 0) sprintf(line + strlen(line), "src[%d]", offset); else - sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset); + sprintf(line + strlen(line), "vload%s(0,src+%d)", + size_names[pos[i]], offset); offset += sizes[pos[i]]; - if (i<(vloads-1)) - sprintf(line + strlen(line), ","); + if (i < (vloads - 1)) sprintf(line + strlen(line), ","); } sprintf(line + strlen(line), ")%s;\n", storeSuffix); @@ -171,7 +187,8 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number } total_results++; total_program_length += (int)strlen(line); - if (total_program_length > MAX_CODE_SIZE) { + if (total_program_length > MAX_CODE_SIZE) + { aborted_due_to_size = 1; done = 1; } @@ -179,132 +196,194 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number if (DEBUG) log_info("line is: %s", line); - // If we did not use all of them, then we ignore any changes further to the right. - // We do this by causing those loops to skip on the next iteration. - if (vloads < DEPTH) { + // If we did not use all of them, then we ignore any changes further to + // the right. We do this by causing those loops to skip on the next + // iteration. + if (vloads < DEPTH) + { if (DEBUG > 1) log_info("done with this depth\n"); - for (int k=vloads; k<DEPTH; k++) - pos[k] = number_of_sizes; + for (int k = vloads; k < DEPTH; k++) pos[k] = number_of_sizes; } // Increment the far right size by 1, rolling over as needed - for (int d=DEPTH-1; d>=0; d--) { + for (int d = DEPTH - 1; d >= 0; d--) + { pos[d]++; - if (pos[d] >= number_of_sizes) { + if (pos[d] >= number_of_sizes) + { pos[d] = 0; - if (d == 0) { + if (d == 0) + { // If we rolled over at the far-left then we are done done = 1; break; } - } else { + } + else + { break; } } - if (done) - break; + if (done) break; // Continue until we are done. } - strcat(program, "}\n\n"); //log_info("%s\n", program); + strcat(program, "}\n\n"); // log_info("%s\n", program); total_program_length += 3; - if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n", - get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads); + if (DEBUG) + log_info( + "\t\t(Program for vector type %s%s contains %d vector creations, " + "of total program length %gkB, with a total of %d vloads.)\n", + get_explicit_type_name(type), size_names[number_of_sizes - 1], + total_results, total_program_length / 1024.0, total_vloads); *number_of_results = current_result; - if (aborted_due_to_size) - return total_results; + if (aborted_due_to_size) return total_results; return 0; } - - -int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_vector_creation(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; - unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16}; + const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; + // should be in sync with global array size_names + const std::vector<unsigned int> vecSizes = { 1, 2, 3, 4, 8, 16 }; - char *program_source; - int error; + int error = CL_SUCCESS; int total_errors = 0; + int number_of_results = 0; - cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; - void *input_data_converted; - void *output_data; - - int number_of_results;; - - input_data_converted = malloc(sizeof(cl_double)*16); - program_source = (char*)malloc(sizeof(char)*1024*1024*4); + std::vector<char> input_data_converted(sizeof(cl_double) * 16); + std::vector<char> program_source(sizeof(char) * 1024 * 1024 * 4); + std::vector<char> output_data; // Iterate over all the types - for (int type_index=0; type_index<10; type_index++) { - if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong))) + for (size_t type_index = 0; type_index < vecType.size(); type_index++) { - log_info("Long/ULong data type not supported on this device\n"); - continue; - } - - clMemWrapper input; - if (vecType[type_index] == kDouble) { - if (!is_extension_available(deviceID, "cl_khr_fp64")) { - log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); + if (!gHasLong + && ((vecType[type_index] == kLong) + || (vecType[type_index] == kULong))) + { + log_info("Long/ULong data type not supported on this device\n"); + continue; + } + else if (vecType[type_index] == kDouble) + { + if (!is_extension_available(deviceID, "cl_khr_fp64")) + { + log_info("Extension cl_khr_fp64 not supported; skipping double " + "tests.\n"); continue; } - log_info("Testing doubles.\n"); + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"); } + else if (vecType[type_index] == kHalf) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + continue; + } + snprintf(extension, sizeof(extension), "%s", + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable"); + } + + log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index])); // Convert the data to the right format for the test. - memset(input_data_converted, 0xff, sizeof(cl_double)*16); - if (vecType[type_index] != kDouble) { - for (int j=0; j<16; j++) { - convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j, - kInt, 0, kRoundToEven, vecType[type_index]); + memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16); + if (vecType[type_index] == kDouble) + { + const cl_double input_data_double[16] = { 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15 }; + memcpy(input_data_converted.data(), &input_data_double, + sizeof(cl_double) * 16); + } + else if (vecType[type_index] == kHalf) + { + cl_half *buf = + reinterpret_cast<cl_half *>(input_data_converted.data()); + for (int j = 0; j < 16; j++) + buf[j] = cl_half_from_float(float(j), CL_HALF_RTE); + } + else + { + for (int j = 0; j < 16; j++) + { + convert_explicit_value( + &j, + ((char *)input_data_converted.data()) + + get_explicit_type_size(vecType[type_index]) * j, + kInt, 0, kRoundToEven, vecType[type_index]); } - } else { - memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16); } - input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16, - (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error); - if (error) { + clMemWrapper input = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + get_explicit_type_size(vecType[type_index]) * 16, + input_data_converted.data(), &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; continue; } // Iterate over all the vector sizes. - for (int size_index=1; size_index< 5; size_index++) { - size_t global[] = {1,1,1}; + for (size_t size_index = 1; size_index < vecSizes.size(); size_index++) + { + size_t global[] = { 1, 1, 1 }; int number_generated = -1; int previous_number_generated = 0; - log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]); - while (number_generated != 0) { + log_info("Testing %s%s...\n", + get_explicit_type_name(vecType[type_index]), + size_names[size_index]); + while (number_generated != 0) + { clMemWrapper output; clKernelWrapper kernel; clProgramWrapper program; - number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated); - if (number_generated != 0) { + number_generated = + create_kernel(vecType[type_index], vecSizes[size_index], + program_source.data(), &number_of_results, + number_generated); + if (number_generated != 0) + { if (previous_number_generated == 0) - log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0); - log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1); + log_info("Code size greater than %gkB; splitting test " + "into multiple kernels.\n", + MAX_CODE_SIZE / 1024.0); + log_info("\tExecuting vector permutations %d to %d...\n", + previous_number_generated, number_generated - 1); } - error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation"); - if (error) { + char *src = program_source.data(); + error = create_single_kernel_helper(context, &program, &kernel, + 1, (const char **)&src, + "test_vector_creation"); + if (error) + { log_error("create_single_kernel_helper failed.\n"); total_errors++; break; } - output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - NULL, &error); - if (error) { + output = clCreateBuffer( + context, CL_MEM_WRITE_ONLY, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + NULL, &error); + if (error) + { print_error(error, "clCreateBuffer failed"); total_errors++; break; @@ -312,95 +391,115 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_q error = clSetKernelArg(kernel, 0, sizeof(input), &input); error |= clSetKernelArg(kernel, 1, sizeof(output), &output); - if (error) { + if (error) + { print_error(error, "clSetKernelArg failed"); total_errors++; break; } - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL); - if (error) { + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, + NULL, 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueNDRangeKernel failed"); total_errors++; break; } error = clFinish(queue); - if (error) { + if (error) + { print_error(error, "clFinish failed"); total_errors++; break; } - output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - if (output_data == NULL) { - log_error("Failed to allocate memory for output data.\n"); - total_errors++; - break; - } - memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]); - error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0, - number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index], - output_data, 0, NULL, NULL); - if (error) { + output_data.resize(number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + memset(output_data.data(), 0xff, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index]); + error = clEnqueueReadBuffer( + queue, output, CL_TRUE, 0, + number_of_results + * get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index], + output_data.data(), 0, NULL, NULL); + if (error) + { print_error(error, "clEnqueueReadBuffer failed"); total_errors++; - free(output_data); break; } // Check the results - char *res = (char *)output_data; - char *exp = (char *)input_data_converted; - for (int i=0; i<number_of_results; i++) { + char *res = (char *)output_data.data(); + char *exp = (char *)input_data_converted.data(); + for (int i = 0; i < number_of_results; i++) + { // If they do not match, then print out why - if (memcmp(input_data_converted, - res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]), - get_explicit_type_size(vecType[type_index])*vecSizes[size_index]) - ) { + if (memcmp(exp, + res + + i + * (get_explicit_type_size( + vecType[type_index]) + * vecSizes[size_index]), + get_explicit_type_size(vecType[type_index]) + * vecSizes[size_index])) + { log_error("Data failed to validate for result %d\n", i); - // Find the line in the program that failed. This is ugly. - char search[32]; - char found_line[1024]; - found_line[0]='\0'; - search[0]='\0'; + // Find the line in the program that failed. This is + // ugly. + char search[32] = { 0 }; + char found_line[1024] = { 0 }; sprintf(search, "result[%d] = (", i); - char *start_loc = strstr(program_source, search); + char *start_loc = strstr(program_source.data(), search); if (start_loc == NULL) - log_error("Failed to find program source for failure for %s in \n%s", search, program_source); - else { - char *end_loc = strstr(start_loc, "\n"); - memcpy(&found_line, start_loc, (end_loc-start_loc)); - found_line[end_loc-start_loc]='\0'; - log_error("Failed vector line: %s\n", found_line); + log_error("Failed to find program source for " + "failure for %s in \n%s", + search, program_source.data()); + else + { + char *end_loc = strstr(start_loc, "\n"); + memcpy(&found_line, start_loc, + (end_loc - start_loc)); + found_line[end_loc - start_loc] = '\0'; + log_error("Failed vector line: %s\n", found_line); } - for (int j=0; j<(int)vecSizes[size_index]; j++) { - char expected_value[64]; - char returned_value[64]; - expected_value[0]='\0'; - returned_value[0]='\0'; - print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value); - print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value); - log_error("index [%d, component %d]: got: %s expected: %s\n", i, j, - returned_value, expected_value); + for (int j = 0; j < (int)vecSizes[size_index]; j++) + { + char expected_value[64] = { 0 }; + char returned_value[64] = { 0 }; + print_type_to_string( + vecType[type_index], + (void *)(res + + get_explicit_type_size( + vecType[type_index]) + * (i * vecSizes[size_index] + j)), + returned_value); + print_type_to_string( + vecType[type_index], + (void *)(exp + + get_explicit_type_size( + vecType[type_index]) + * j), + expected_value); + log_error("index [%d, component %d]: got: %s " + "expected: %s\n", + i, j, returned_value, expected_value); } - total_errors++; } } - free(output_data); previous_number_generated = number_generated; } // number_generated != 0 - } // vector sizes } // vector types - free(input_data_converted); - free(program_source); - return total_errors; } - - diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp index 884bcf36..fdbc8919 100644 --- a/test_conformance/basic/test_vector_swizzle.cpp +++ b/test_conformance/basic/test_vector_swizzle.cpp @@ -22,6 +22,8 @@ #include "procs.h" #include "harness/testHarness.h" +static std::string pragma_extension; + template <int N> struct TestInfo { }; @@ -629,7 +631,9 @@ static int test_vectype(const char* type_name, cl_device_id device, clProgramWrapper program; clKernelWrapper kernel; - const char* xyzw_source = TestInfo<N>::kernel_source_xyzw; + std::string program_src = + pragma_extension + std::string(TestInfo<N>::kernel_source_xyzw); + const char* xyzw_source = program_src.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw", buildOptions.c_str()); @@ -643,7 +647,9 @@ static int test_vectype(const char* type_name, cl_device_id device, clProgramWrapper program; clKernelWrapper kernel; - const char* sN_source = TestInfo<N>::kernel_source_sN; + std::string program_src = + pragma_extension + std::string(TestInfo<N>::kernel_source_sN); + const char* sN_source = program_src.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN", buildOptions.c_str()); @@ -660,7 +666,9 @@ static int test_vectype(const char* type_name, cl_device_id device, const Version device_version = get_device_cl_version(device); if (device_version >= Version(3, 0)) { - const char* rgba_source = TestInfo<N>::kernel_source_rgba; + std::string program_src = + pragma_extension + std::string(TestInfo<N>::kernel_source_rgba); + const char* rgba_source = program_src.c_str(); error = create_single_kernel_helper( context, &program, &kernel, 1, &rgba_source, "test_vector_swizzle_rgba", buildOptions.c_str()); @@ -689,6 +697,7 @@ int test_vector_swizzle(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { int hasDouble = is_extension_available(device, "cl_khr_fp64"); + int hasHalf = is_extension_available(device, "cl_khr_fp16"); int result = TEST_PASS; result |= test_type<cl_char>("char", device, context, queue); @@ -703,8 +712,14 @@ int test_vector_swizzle(cl_device_id device, cl_context context, result |= test_type<cl_ulong>("ulong", device, context, queue); } result |= test_type<cl_float>("float", device, context, queue); + if (hasHalf) + { + pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + result |= test_type<cl_half>("half", device, context, queue); + } if (hasDouble) { + pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; result |= test_type<cl_double>("double", device, context, queue); } return result; diff --git a/test_conformance/basic/test_vloadstore.cpp b/test_conformance/basic/test_vloadstore.cpp index e137f9e7..d34ecbf9 100644 --- a/test_conformance/basic/test_vloadstore.cpp +++ b/test_conformance/basic/test_vloadstore.cpp @@ -13,52 +13,129 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" - +#include <algorithm> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <limits.h> #include <sys/types.h> #include <sys/stat.h> +#include <vector> +#include <CL/cl_half.h> #include "procs.h" #include "harness/conversions.h" -#include "harness/typeWrappers.h" #include "harness/errorHelpers.h" +#include "harness/stringHelpers.h" +#include "harness/typeWrappers.h" // Outputs debug information for stores #define DEBUG 0 // Forces stores/loads to be done with offsets = tid #define LINEAR_OFFSETS 0 #define NUM_LOADS 512 - -static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; +#define HFF(num) cl_half_from_float(num, halfRoundingMode) +#define HTF(num) cl_half_to_float(num) + +char pragma_str[128] = { 0 }; +char mem_type[64] = { 0 }; +char store_str[128] = { 0 }; +char load_str[128] = { 0 }; + +extern cl_half_rounding_mode halfRoundingMode; + +// clang-format off +static const char *store_pattern= "results[ tid ] = tmp;\n"; +static const char *store_patternV3 = "results[3*tid] = tmp.s0; results[3*tid+1] = tmp.s1; results[3*tid+2] = tmp.s2;\n"; +static const char *load_pattern = "sSharedStorage[ i ] = src[ i ];\n"; +static const char *load_patternV3 = "sSharedStorage[3*i] = src[ 3*i]; sSharedStorage[3*i+1] = src[3*i+1]; sSharedStorage[3*i+2] = src[3*i+2];\n"; +static const char *kernel_pattern[] = { +pragma_str, +"#define STYPE %s\n" +"__kernel void test_fn( ", mem_type, " STYPE *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" int tid = get_global_id( 0 );\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (", mem_type, " STYPE *) src ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n" +}; + +const char *pattern_local [] = { +pragma_str, +"__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" int tid = get_global_id( 0 );\n" +" int lid = get_local_id( 0 );\n" +"\n" +" if( lid == 0 )\n" +" {\n" +" for( int i = 0; i < %d; i++ ) {\n" +" ", load_str, +" }\n" +" }\n" +// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all +// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be +// updated on all threads at that point +" barrier( CLK_LOCAL_MEM_FENCE );\n" +"\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n" }; + +const char *pattern_priv [] = { +pragma_str, +// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means +// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test +"#define PRIV_TYPE %s\n" +"#define PRIV_SIZE %d\n" +"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" +"{\n" +" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" +" int tid = get_global_id( 0 );\n" +"\n" +" for( int i = 0; i < PRIV_SIZE; i++ )\n" +" sPrivateStorage[ i ] = src[ i ];\n" +// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for +// anybody else to sync up +"\n" +" %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n" +" ", store_str, +"}\n"}; +// clang-format on #pragma mark -------------------- vload harness -------------------------- -typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ); +typedef void (*create_program_fn)(std::string &, size_t, ExplicitType, size_t, + size_t); +typedef int (*test_fn)(cl_device_id, cl_context, cl_command_queue, ExplicitType, + unsigned int, create_program_fn, size_t); -int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vload_program_fn createFn, size_t bufferSize, MTdata d ) +int test_vload(cl_device_id device, cl_context context, cl_command_queue queue, + ExplicitType type, unsigned int vecSize, + create_program_fn createFn, size_t bufferSize) { - int error; - clProgramWrapper program; clKernelWrapper kernel; clMemWrapper streams[ 4 ]; + MTdataHolder d(gRandomSeed); const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS; if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128; size_t threads[ 1 ], localThreads[ 1 ]; clProtectedArray inBuffer( bufferSize ); - char programSrc[ 10240 ]; cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ]; size_t numElements, typeSize, i; unsigned int outVectorSize; + pragma_str[0] = '\0'; + if (type == kDouble) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"); + else if (type == kHalf) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); typeSize = get_explicit_type_size( type ); numElements = bufferSize / ( typeSize * vecSize ); @@ -83,25 +160,19 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, outVectorSize = vecSize; // Declare output buffers now -#if !(defined(_WIN32) && defined(_MSC_VER)) - char outBuffer[ numLoads * typeSize * outVectorSize ]; - char referenceBuffer[ numLoads * typeSize * vecSize ]; -#else - char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char)); - char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char)); -#endif + std::vector<char> outBuffer(numLoads * typeSize * outVectorSize); + std::vector<char> referenceBuffer(numLoads * typeSize * vecSize); // Create the program - - + std::string programSrc; createFn( programSrc, numElements, type, vecSize, outVectorSize); // Create our kernel - const char *ptr = programSrc; - - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); + const char *ptr = programSrc.c_str(); + cl_int error = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "test_fn"); test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); + if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str()); // Get the number of args to differentiate the kernels with local storage. (They have 5) cl_uint numArgs; @@ -115,7 +186,9 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, test_error( error, "Unable to create kernel stream" ); streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error ); test_error( error, "Unable to create kernel stream" ); - streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error ); + streams[3] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numLoads * typeSize * outVectorSize, + (void *)outBuffer.data(), &error); test_error( error, "Unable to create kernel stream" ); // Set parameters and run @@ -145,28 +218,32 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, test_error( error, "Unable to exec kernel" ); // Get the results - error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL ); + error = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, + numLoads * typeSize * outVectorSize + * sizeof(cl_char), + (void *)outBuffer.data(), 0, NULL, NULL); test_error( error, "Unable to read results" ); - // Create the reference results - memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char)); + referenceBuffer.assign(numLoads * typeSize * vecSize, 0); for( i = 0; i < numLoads; i++ ) { - memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize, - typeSize * vecSize ); + memcpy(&referenceBuffer[i * typeSize * vecSize], + ((char *)(void *)inBuffer) + + ((offsets[i] * vecSize) + alignmentOffsets[i]) * typeSize, + typeSize * vecSize); } // Validate the results now - char *expected = referenceBuffer; - char *actual = outBuffer; + char *expected = referenceBuffer.data(); + char *actual = outBuffer.data(); char *in = (char *)(void *)inBuffer; if (DEBUG) { log_info("Memory contents:\n"); + char inString[1024]; + char expectedString[1024], actualString[1024]; for (i=0; i<numElements; i++) { - char inString[1024]; - char expectedString[ 1024 ], actualString[ 1024 ]; if (i < numLoads) { log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ), GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ), @@ -197,35 +274,42 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, expected += typeSize * vecSize; actual += typeSize * outVectorSize; } - return 0; } -int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize ) +template <test_fn test_func_ptr> +int test_vset(cl_device_id device, cl_context context, cl_command_queue queue, + create_program_fn createFn, size_t bufferSize) { - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; + std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort, + kInt, kUInt, kLong, kULong, + kFloat, kHalf, kDouble }; unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 }; const char *size_names[] = { "2", "3", "4", "8", "16"}; - unsigned int typeIdx, sizeIdx; int error = 0; - MTdata mtData = init_genrand( gRandomSeed ); log_info("Testing with buffer size of %d.\n", (int)bufferSize); - for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ ) - { + bool hasDouble = is_extension_available(device, "cl_khr_fp64"); + bool hasHalf = is_extension_available(device, "cl_khr_fp16"); - if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) + for (unsigned typeIdx = 0; typeIdx < vecType.size(); typeIdx++) + { + if (vecType[typeIdx] == kDouble && !hasDouble) continue; - - if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong ) + else if (vecType[typeIdx] == kHalf && !hasHalf) + continue; + else if ((vecType[typeIdx] == kLong || vecType[typeIdx] == kULong) + && !gHasLong) continue; - for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) + for (unsigned sizeIdx = 0; vecSizes[sizeIdx] != 0; sizeIdx++) { log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]); - int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData ); + int error_this_type = + test_func_ptr(device, context, queue, vecType[typeIdx], + vecSizes[sizeIdx], createFn, bufferSize); if (error_this_type) { error += error_this_type; log_error("Failure; skipping further sizes for this type."); @@ -233,125 +317,59 @@ int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queu } } } - - free_mtdata(mtData); - return error; } #pragma mark -------------------- vload test cases -------------------------- -void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_global_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(mem_type, sizeof(mem_type), "__global"); + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize, - (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(), + typeName, (int)inVectorSize, (int)inVectorSize); } int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) { - return test_vloadset( device, context, queue, create_global_load_code, 10240 ); + return test_vset<test_vload>(device, context, queue, + create_global_load_code, 10240); } - -void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_local_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - //" __local %s%d sSharedStorage[ %d ];\n" - "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " int lid = get_local_id( 0 );\n" - "\n" - " if( lid == 0 )\n" - " {\n" - " for( int i = 0; i < %d; i++ )\n" - " sSharedStorage[ i ] = src[ i ];\n" - " }\n" - // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all - // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be - // updated on all threads at that point - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - //" __local %s%d sSharedStorage[ %d ];\n" - "__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " int lid = get_local_id( 0 );\n" - "\n" - " if( lid == 0 )\n" - " {\n" - " for( int i = 0; i < %d; i++ ) {\n" - " sSharedStorage[ 3*i ] = src[ 3*i ];\n" - " sSharedStorage[ 3*i +1] = src[ 3*i +1];\n" - " sSharedStorage[ 3*i +2] = src[ 3*i +2];\n" - " }\n" - " }\n" - // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all - // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be - // updated on all threads at that point - " barrier( CLK_LOCAL_MEM_FENCE );\n" - "\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid +1] = tmp.s1;\n" - " results[ 3*tid +2] = tmp.s2;\n" - "}\n"; - + std::snprintf(store_str, sizeof(store_str), store_patternV3); + std::snprintf(load_str, sizeof(load_str), load_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, /*(int)inBufferSize,*/ - typeName, typeName, - (int)inBufferSize, - typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, (int)inVectorSize, /*(int)inBufferSize,*/ - typeName, (int)inVectorSize, typeName, (int)outVectorSize, - (int)inBufferSize, - typeName, (int)inVectorSize, (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + std::string inTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); + std::snprintf(load_str, sizeof(load_str), load_pattern); } + + std::string kernel_src = concat_kernel( + pattern_local, sizeof(pattern_local) / sizeof(pattern_local[0])); + destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), inTypeName.c_str(), + outTypeName.c_str(), (int)inBufferSize, typeName, + (int)inVectorSize, (int)inVectorSize, typeName); } int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -360,53 +378,34 @@ int test_vload_local(cl_device_id device, cl_context context, cl_command_queue q cl_ulong localSize; int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL ); test_error( error, "Unable to get max size of local memory buffer" ); - if( localSize > 10240 ) - localSize = 10240; + if (localSize > 10240) localSize = 10240; if (localSize > 4096) localSize -= 2048; else localSize /= 2; - return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize ); + return test_vset<test_vload>(device, context, queue, create_local_load_code, + (size_t)localSize); } - -void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_constant_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " int tid = get_global_id( 0 );\n" - " %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(mem_type, sizeof(mem_type), "__constant"); + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, typeName, - typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize, - (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(), + typeName, (int)inVectorSize, (int)inVectorSize); } int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -415,109 +414,71 @@ int test_vload_constant(cl_device_id device, cl_context context, cl_command_queu cl_ulong maxSize; int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL ); test_error( error, "Unable to get max size of constant memory buffer" ); - if( maxSize > 10240 ) - maxSize = 10240; + if (maxSize > 10240) maxSize = 10240; if (maxSize > 4096) maxSize -= 2048; else maxSize /= 2; - return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize ); + return test_vset<test_vload>(device, context, queue, + create_constant_load_code, (size_t)maxSize); } - -void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize ) +void create_private_load_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t outVectorSize) { - const char *pattern = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s%d\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < %d; i++ )\n" - " sPrivateStorage[ i ] = src[ i ];\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ tid ] = tmp;\n" - "}\n"; - - const char *patternV3 = - "%s%s" - // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means - // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test - "#define PRIV_TYPE %s\n" - "#define PRIV_SIZE %d\n" - "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n" - "{\n" - " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n" - " int tid = get_global_id( 0 );\n" - "\n" - " for( int i = 0; i < PRIV_SIZE; i++ )\n" - " {\n" - " sPrivateStorage[ i ] = src[ i ];\n" - " }\n" - // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for - // anybody else to sync up - "\n" - " %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n" - " results[ 3*tid ] = tmp.s0;\n" - " results[ 3*tid+1 ] = tmp.s1;\n" - " results[ 3*tid+2 ] = tmp.s2;\n" - "}\n"; - + std::snprintf(store_str, sizeof(store_str), store_patternV3); const char *typeName = get_explicit_type_name(type); - if(inVectorSize ==3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, 3*((int)inBufferSize), - typeName, typeName, - typeName ); - // log_info("Src is \"\n%s\n\"\n", destBuffer); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - "", - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, (int)outVectorSize, - (int)inBufferSize, - typeName, (int)inVectorSize, (int)inVectorSize, typeName ); + std::string outTypeName = typeName; + std::string inTypeName = typeName; + int bufSize = (int)inBufferSize * 3; + if (inVectorSize != 3) + { + outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize); + inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize); + bufSize = (int)inBufferSize; + std::snprintf(store_str, sizeof(store_str), store_pattern); } + + std::string kernel_src = concat_kernel( + pattern_priv, sizeof(pattern_priv) / sizeof(pattern_priv[0])); + destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), bufSize, + inTypeName.c_str(), outTypeName.c_str(), typeName, + (int)inVectorSize, (int)inVectorSize, typeName); } int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) { // We have no idea how much actual private storage is available, so just pick a reasonable value, // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vloadset( device, context, queue, create_private_load_code, 256 ); + return test_vset<test_vload>(device, context, queue, + create_private_load_code, 256); } - /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #pragma mark -------------------- vstore harness -------------------------- -typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ); - -int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize, - create_vstore_program_fn createFn, size_t bufferSize, MTdata d ) +int test_vstore(cl_device_id device, cl_context context, cl_command_queue queue, + ExplicitType type, unsigned int vecSize, + create_program_fn createFn, size_t bufferSize) { - int error; - clProgramWrapper program; clKernelWrapper kernel; clMemWrapper streams[ 3 ]; + MTdataHolder d(gRandomSeed); size_t threads[ 1 ], localThreads[ 1 ]; - size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS; + pragma_str[0] = '\0'; + if (type == kDouble) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"); + else if (type == kHalf) + std::snprintf(pragma_str, sizeof(pragma_str), + "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128; @@ -534,39 +495,22 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue } if (DEBUG) log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize); -#if !(defined(_WIN32) && defined(_MSC_VER)) - cl_uint offsets[ numStores ]; -#else - cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint)); -#endif - char programSrc[ 10240 ]; - size_t i; - -#if !(defined(_WIN32) && defined(_MSC_VER)) - char inBuffer[ numStores * typeSize * vecSize ]; -#else - char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char)); -#endif + + std::vector<cl_uint> offsets(numStores); + std::vector<char> inBuffer(numStores * typeSize * vecSize); + clProtectedArray outBuffer( numElements * typeSize * vecSize ); -#if !(defined(_WIN32) && defined(_MSC_VER)) - char referenceBuffer[ numElements * typeSize * vecSize ]; -#else - char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char)); -#endif + std::vector<char> referenceBuffer(numElements * typeSize * vecSize); // Create some random input data and random offsets to load from - generate_random_data( type, numStores * vecSize, d, (void *)inBuffer ); + generate_random_data(type, numStores * vecSize, d, (void *)inBuffer.data()); // Note: make sure no two offsets are the same, otherwise the output would depend on // the order that threads ran in, and that would be next to impossible to verify -#if !(defined(_WIN32) && defined(_MSC_VER)) - char flags[ numElements ]; -#else - char* flags = (char*)_malloca( numElements * sizeof(char)); -#endif - - memset( flags, 0, numElements * sizeof(char) ); - for( i = 0; i < numStores; i++ ) + std::vector<char> flags(numElements); + flags.assign(flags.size(), 0); + + for (size_t i = 0; i < numStores; i++) { do { @@ -579,13 +523,15 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue if (LINEAR_OFFSETS) log_info("Offsets set to thread IDs to simplify output.\n"); - createFn( programSrc, numElements, type, vecSize ); + std::string programSrc; + createFn(programSrc, numElements, type, vecSize, vecSize); // Create our kernel - const char *ptr = programSrc; - error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" ); + const char *ptr = programSrc.c_str(); + cl_int error = create_single_kernel_helper(context, &program, &kernel, 1, + &ptr, "test_fn"); test_error( error, "Unable to create testing kernel" ); - if (DEBUG) log_info("Kernel: \n%s\n", programSrc); + if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str()); // Get the number of args to differentiate the kernels with local storage. (They have 5) cl_uint numArgs; @@ -593,9 +539,14 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue test_error( error, "clGetKernelInfo failed"); // Set up parameters - streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error ); + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numStores * typeSize * vecSize * sizeof(cl_char), + (void *)inBuffer.data(), &error); test_error( error, "Unable to create kernel stream" ); - streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error ); + streams[1] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + numStores * sizeof(cl_uint), offsets.data(), &error); test_error( error, "Unable to create kernel stream" ); streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error ); test_error( error, "Unable to create kernel stream" ); @@ -606,7 +557,7 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue // We need to set the size of the local storage error = clSetKernelArg(kernel, 0, bufferSize, NULL); test_error( error, "clSetKernelArg for buffer failed"); - for( i = 0; i < 3; i++ ) + for (size_t i = 0; i < 3; i++) { error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] ); test_error( error, "Unable to set kernel argument" ); @@ -615,11 +566,10 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue else { // No local storage - for( i = 0; i < 3; i++ ) + for (size_t i = 0; i < 3; i++) { error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] ); - if (error) - log_info("%s\n", programSrc); + if (error) log_info("%s\n", programSrc.c_str()); test_error( error, "Unable to set kernel argument" ); } } @@ -654,25 +604,26 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL ); test_error( error, "Unable to read results" ); - // Create the reference results - memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) ); - for( i = 0; i < numStores; i++ ) + referenceBuffer.assign(referenceBuffer.size(), 0); + for (size_t i = 0; i < numStores; i++) { - memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize ); + memcpy(&referenceBuffer[((offsets[i] * vecSize) + addressOffset) + * typeSize], + &inBuffer[i * typeSize * vecSize], typeSize * vecSize); } // Validate the results now - char *expected = referenceBuffer; + char *expected = referenceBuffer.data(); char *actual = (char *)(void *)outBuffer; if (DEBUG) { log_info("Memory contents:\n"); - for (i=0; i<numElements; i++) + char inString[1024]; + char expectedString[1024], actualString[1024]; + for (size_t i = 0; i < numElements; i++) { - char inString[1024]; - char expectedString[ 1024 ], actualString[ 1024 ]; if (i < numStores) { log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ), @@ -693,7 +644,7 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue } } - for( i = 0; i < numElements; i++ ) + for (size_t i = 0; i < numElements; i++) { if( memcmp( expected, actual, typeSize * vecSize ) != 0 ) { @@ -719,62 +670,26 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue actual += typeSize * vecSize; } } - return 0; } -int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize ) -{ - ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes }; - unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 }; - const char *size_names[] = { "2", "3", "4", "8", "16"}; - unsigned int typeIdx, sizeIdx; - int error = 0; - MTdata d = init_genrand( gRandomSeed ); - - log_info("Testing with buffer size of %d.\n", (int)bufferSize); - - for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ ) - { - if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) - continue; - - if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong ) - continue; - - for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ ) - { - log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]); - - int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d ); - if (error_this_type) - { - log_error("Failure; skipping further sizes for this type.\n"); - error += error_this_type; - break; - } - } - } - - free_mtdata(d); - return error; -} - - #pragma mark -------------------- vstore test cases -------------------------- -void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) +void create_global_store_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t /*unused*/) { - const char *pattern = - "%s" + // clang-format off + const char *pattern [] = { + pragma_str, "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" " vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n" - "}\n"; + "}\n" }; - const char *patternV3 = - "%s" + const char *patternV3 [] = { + pragma_str, "__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" @@ -783,45 +698,48 @@ void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitTy " } else {\n" " vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n" " }\n" - "}\n"; + "}\n" }; + // clang-format on const char *typeName = get_explicit_type_name(type); - if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, typeName, typeName); - - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, typeName, (int)inVectorSize ); + std::string kernel_src = + concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0])); + destBuffer = str_sprintf(kernel_src, typeName, typeName, typeName); + } + else + { + std::string kernel_src = + concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, (int)inVectorSize, + typeName, (int)inVectorSize); } - // if(inVectorSize == 3 || inVectorSize == 4) { - // log_info("\n----\n%s\n----\n", destBuffer); - // } } int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) { - return test_vstoreset( device, context, queue, create_global_store_code, 10240 ); + return test_vset<test_vstore>(device, context, queue, + create_global_store_code, 10240); } - -void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) +void create_local_store_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t /*unused*/) { - const char *pattern = - "%s" - "\n" - "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" + // clang-format off + const char *pattern[] = { + pragma_str, + "#define LOC_TYPE %s\n" + "#define LOC_VTYPE %s%d\n" + "__kernel void test_fn(__local LOC_VTYPE *sSharedStorage, __global LOC_VTYPE *srcValues, __global uint *offsets, __global LOC_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n" + " sSharedStorage[ offsets[tid] ] = (LOC_VTYPE)(LOC_TYPE)0;\n" " sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n" " barrier( CLK_LOCAL_MEM_FENCE );\n" "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n" + " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local LOC_TYPE *)sSharedStorage ) + alignmentOffset );\n" "\n" // Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output // buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy @@ -830,20 +748,20 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " int i;\n" - " __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" + " __local LOC_TYPE *sp = (__local LOC_TYPE*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n" + " __global LOC_TYPE *dp = (__global LOC_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n" " for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n" }; - const char *patternV3 = - "%s" - "\n" - "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n" + const char *patternV3 [] = { + pragma_str, + "#define LOC_TYPE %s\n" + "__kernel void test_fn(__local LOC_TYPE *sSharedStorage, __global LOC_TYPE *srcValues, __global uint *offsets, __global LOC_TYPE *destBuffer, uint alignmentOffset )\n" "{\n" " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n" + " sSharedStorage[ 3*offsets[tid] ] = (LOC_TYPE)0;\n" " sSharedStorage[ 3*offsets[tid] +1 ] = \n" " sSharedStorage[ 3*offsets[tid] ];\n" " sSharedStorage[ 3*offsets[tid] +2 ] = \n" @@ -865,30 +783,26 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " int i;\n" - " __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n" + " __local LOC_TYPE *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n" + " __global LOC_TYPE *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n" " for( i = 0; i < 3; i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n" }; + // clang-format on const char *typeName = get_explicit_type_name(type); if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, - typeName, - typeName, typeName, - typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, - typeName, typeName, typeName ); + std::string kernel_src = + concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0])); + destBuffer = str_sprintf(kernel_src, typeName); + } + else + { + std::string kernel_src = + concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0])); + destBuffer = str_sprintf(kernel_src, typeName, typeName, + (int)inVectorSize, (int)inVectorSize); } - // log_info(destBuffer); } int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems ) @@ -897,81 +811,82 @@ int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue cl_ulong localSize; int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL ); test_error( error, "Unable to get max size of local memory buffer" ); - if( localSize > 10240 ) - localSize = 10240; + if (localSize > 10240) localSize = 10240; if (localSize > 4096) localSize -= 2048; else localSize /= 2; - return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize ); + return test_vset<test_vstore>(device, context, queue, + create_local_store_code, (size_t)localSize); } - -void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize ) +void create_private_store_code(std::string &destBuffer, size_t inBufferSize, + ExplicitType type, size_t inVectorSize, + size_t /*unused*/) { - const char *pattern = - "%s" + // clang-format off + const char *pattern [] = { + pragma_str, + "#define PRIV_TYPE %s\n" + "#define PRIV_VTYPE %s%d\n" // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test "\n" - "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n" + "__kernel void test_fn( __global PRIV_VTYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" - " __private %s%d sPrivateStorage[ %d ];\n" - " int tid = get_global_id( 0 );\n" + " __private PRIV_VTYPE sPrivateStorage[ %d ];\n" + " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s%d)(%s)0;\n" + " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n" "\n" - " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" + " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n" "\n" // Note: we only copy the relevant portion of our local storage over to the dest buffer, because // otherwise, local threads would be overwriting results from other local threads " uint i;\n" - " __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n" - " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n" + " __private PRIV_TYPE *sp = (__private PRIV_TYPE*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n" + " __global PRIV_TYPE *dp = (__global PRIV_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n" " for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; - + "}\n"}; - const char *patternV3 = - "%s" + const char *patternV3 [] = { + pragma_str, + "#define PRIV_TYPE %s\n" + "#define PRIV_VTYPE %s3\n" // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test "\n" - "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n" + "__kernel void test_fn( __global PRIV_TYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n" "{\n" - " __private %s3 sPrivateStorage[ %d ];\n" // keep this %d - " int tid = get_global_id( 0 );\n" + " __private PRIV_VTYPE sPrivateStorage[ %d ];\n" // keep this %d + " int tid = get_global_id( 0 );\n" // We need to zero the shared storage since any locations we don't write to will have garbage otherwise. - " sPrivateStorage[tid] = (%s3)(%s)0;\n" + " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n" "\n" - - " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n" - "\n" - // Note: we only copy the relevant portion of our local storage over to the dest buffer, because - // otherwise, local threads would be overwriting results from other local threads + " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n" " uint i;\n" - " __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n" - " __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n" + " __private PRIV_TYPE *sp = ((__private PRIV_TYPE*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n" + " __global PRIV_TYPE *dp = ((__global PRIV_TYPE*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n" " for( i = 0; i < 3; i++ ) \n" " dp[i] = sp[i];\n" - "}\n"; + "}\n"}; + // clang-format on const char *typeName = get_explicit_type_name(type); if(inVectorSize == 3) { - sprintf( destBuffer, patternV3, - type == kDouble ? doubleExtensionPragma : "", - typeName, typeName, - typeName, (int)inBufferSize, - typeName, typeName, - typeName, typeName, typeName, typeName, typeName ); - } else { - sprintf( destBuffer, pattern, - type == kDouble ? doubleExtensionPragma : "", - typeName, (int)inVectorSize, typeName, (int)inVectorSize, - typeName, (int)inVectorSize, (int)inBufferSize, - typeName, (int)inVectorSize, typeName, - (int)inVectorSize, typeName, typeName, typeName, typeName, typeName ); + std::string kernel_src = + concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0])); + destBuffer = + str_sprintf(kernel_src, typeName, typeName, (int)inBufferSize); + } + else + { + std::string kernel_src = + concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0])); + destBuffer = + str_sprintf(kernel_src, typeName, typeName, (int)inVectorSize, + (int)inBufferSize, (int)inVectorSize); } } @@ -979,7 +894,8 @@ int test_vstore_private(cl_device_id device, cl_context context, cl_command_queu { // We have no idea how much actual private storage is available, so just pick a reasonable value, // which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes - return test_vstoreset( device, context, queue, create_private_store_code, 256 ); + return test_vset<test_vstore>(device, context, queue, + create_private_store_code, 256); } diff --git a/test_conformance/basic/test_wg_barrier.cpp b/test_conformance/basic/test_wg_barrier.cpp deleted file mode 100644 index a237d80b..00000000 --- a/test_conformance/basic/test_wg_barrier.cpp +++ /dev/null @@ -1,159 +0,0 @@ -// -// Copyright (c) 2017 The Khronos Group Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -#include "harness/compat.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <sys/stat.h> - - -#include "procs.h" - -const char *wg_barrier_kernel_code = -"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n" -"{\n" -" int tid = get_local_id(0);\n" -" int lsize = get_local_size(0);\n" -" int i;\n" -"\n" -" tmp_sum[tid] = 0;\n" -" for (i=tid; i<n; i+=lsize)\n" -" tmp_sum[tid] += a[i];\n" -" \n" -" // updated to work for any workgroup size \n" -" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n" -" {\n" -" work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n" -" if (tid + i < lsize)\n" -" tmp_sum[tid] += tmp_sum[tid + i];\n" -" lsize = i; \n" -" }\n" -"\n" -" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n" -" if (tid == 0)\n" -" *sum = tmp_sum[0];\n" -"}\n"; - - -static int -verify_sum(int *inptr, int *tmpptr, int *outptr, int n) -{ - int i; - int reference = 0; - - for (i=0; i<n; i++) - { - reference += inptr[i]; - } - - if (reference != outptr[0]) - { - log_error("work_group_barrier test failed\n"); - return -1; - } - - log_info("work_group_barrier test passed\n"); - return 0; -} - - -int -test_wg_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) -{ - cl_mem streams[3]; - cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL; - cl_program program; - cl_kernel kernel; - size_t global_threads[3]; - size_t local_threads[3]; - int err; - int i; - size_t max_local_workgroup_size[3]; - size_t max_threadgroup_size = 0; - MTdata d; - - err = create_single_kernel_helper_with_build_options( - context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum", - nullptr); - test_error(err, "Failed to build kernel/program."); - - err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, - sizeof(max_threadgroup_size), &max_threadgroup_size, NULL); - test_error(err, "clGetKernelWorkgroupInfo failed."); - - err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL); - test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); - - // Pick the minimum of the device and the kernel - if (max_threadgroup_size > max_local_workgroup_size[0]) - max_threadgroup_size = max_local_workgroup_size[0]; - - // work group size must divide evenly into the global size - while( num_elements % max_threadgroup_size ) - max_threadgroup_size--; - - input_ptr = (int*)malloc(sizeof(int) * num_elements); - output_ptr = (int*)malloc(sizeof(int)); - - streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * num_elements, NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[1] = - clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err); - test_error(err, "clCreateBuffer failed."); - streams[2] = - clCreateBuffer(context, CL_MEM_READ_WRITE, - sizeof(cl_int) * max_threadgroup_size, NULL, &err); - test_error(err, "clCreateBuffer failed."); - - d = init_genrand( gRandomSeed ); - for (i=0; i<num_elements; i++) - input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d); - free_mtdata(d); d = NULL; - - err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL); - test_error(err, "clEnqueueWriteBuffer failed."); - - err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]); - err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements); - err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]); - err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]); - test_error(err, "clSetKernelArg failed."); - - global_threads[0] = max_threadgroup_size; - local_threads[0] = max_threadgroup_size; - - err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL ); - test_error(err, "clEnqueueNDRangeKernel failed."); - - err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL ); - test_error(err, "clEnqueueReadBuffer failed."); - - err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements); - - // cleanup - clReleaseMemObject(streams[0]); - clReleaseMemObject(streams[1]); - clReleaseMemObject(streams[2]); - clReleaseKernel(kernel); - clReleaseProgram(program); - free(input_ptr); - free(output_ptr); - - return err; -} diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp index d95915cf..9683a834 100644 --- a/test_conformance/basic/test_work_item_functions.cpp +++ b/test_conformance/basic/test_work_item_functions.cpp @@ -91,7 +91,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma { for( int i = 0; i < NUM_TESTS; i++ ) { - size_t numItems = 1; for( size_t j = 0; j < dim; j++ ) { // All of our thread sizes should be within the max local sizes, since they're all <= 20 @@ -100,8 +99,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) ) localThreads[ j ]--; - numItems *= threads[ j ]; - // Hack for now: localThreads > 1 are iffy localThreads[ j ] = 1; } diff --git a/test_conformance/c11_atomics/CMakeLists.txt b/test_conformance/c11_atomics/CMakeLists.txt index 621adda7..0d389bce 100644 --- a/test_conformance/c11_atomics/CMakeLists.txt +++ b/test_conformance/c11_atomics/CMakeLists.txt @@ -7,4 +7,6 @@ set(${MODULE_NAME}_SOURCES test_atomics.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h index 6c7d0b12..37c37e87 100644 --- a/test_conformance/c11_atomics/common.h +++ b/test_conformance/c11_atomics/common.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _COMMON_H_ -#define _COMMON_H_ +#ifndef COMMON_H_ +#define COMMON_H_ #include "harness/testHarness.h" #include "harness/typeWrappers.h" @@ -1567,4 +1567,4 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest( return 0; } -#endif //_COMMON_H_ +#endif // COMMON_H_ diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h index 6c4e783a..b865970f 100644 --- a/test_conformance/c11_atomics/host_atomics.h +++ b/test_conformance/c11_atomics/host_atomics.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _HOST_ATOMICS_H_ -#define _HOST_ATOMICS_H_ +#ifndef HOST_ATOMICS_H_ +#define HOST_ATOMICS_H_ #include "harness/testHarness.h" @@ -247,4 +247,4 @@ CorrespondingType host_atomic_fetch_max(volatile AtomicType *a, CorrespondingTyp bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order); -#endif //_HOST_ATOMICS_H_ +#endif // HOST_ATOMICS_H_ diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp index d905b2ca..ca2c2242 100644 --- a/test_conformance/c11_atomics/test_atomics.cpp +++ b/test_conformance/c11_atomics/test_atomics.cpp @@ -3145,7 +3145,7 @@ public: } private: - int _subCaseId; + size_t _subCaseId; struct TestDefinition _subCase; }; diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp index 0a459e97..5d0e99e0 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp @@ -19,7 +19,7 @@ #include "harness/errorHelpers.h" #include "harness/deviceInfo.h" #include <assert.h> -#include <iostream> +#include <algorithm> #include <stdexcept> #define ASSERT(x) assert((x)) @@ -740,21 +740,42 @@ clExternalSemaphore::clExternalSemaphore( cl_int err = 0; cl_device_id devList[] = { deviceId, NULL }; -#ifdef _WIN32 - if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32")) - { - throw std::runtime_error("Device does not support " - "cl_khr_external_semaphore_win32 extension\n"); - } -#elif !defined(__APPLE__) - if (!is_extension_available(devList[0], - "cl_khr_external_semaphore_opaque_fd")) + switch (externalSemaphoreHandleType) { - throw std::runtime_error( - "Device does not support cl_khr_external_semaphore_opaque_fd " - "extension \n"); + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_opaque_fd")) + { + throw std::runtime_error("Device does not support " + "cl_khr_external_semaphore_opaque_fd " + "extension \n"); + } + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_win32")) + { + throw std::runtime_error( + "Device does not support " + "cl_khr_external_semaphore_win32 extension\n"); + } + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + if (!is_extension_available(devList[0], + "cl_khr_external_semaphore_sync_fd")) + { + throw std::runtime_error( + "Device does not support cl_khr_external_semaphore_sync_fd " + "extension \n"); + } + break; + default: + throw std::runtime_error( + "Unsupported external semaphore handle type\n"); + break; } -#endif std::vector<cl_semaphore_properties_khr> sema_props{ (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR, @@ -803,6 +824,16 @@ clExternalSemaphore::clExternalSemaphore( sema_props.push_back((cl_semaphore_properties_khr)handle); #endif break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + err = check_external_semaphore_handle_type( + devList[0], CL_SEMAPHORE_HANDLE_SYNC_FD_KHR); + sema_props.push_back(static_cast<cl_semaphore_properties_khr>( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR)); + sema_props.push_back(static_cast<cl_semaphore_properties_khr>( + CL_SEMAPHORE_HANDLE_SYNC_FD_KHR)); + sema_props.push_back(static_cast<cl_semaphore_properties_khr>( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR)); + break; default: ASSERT(0); log_error("Unsupported external memory handle type\n"); @@ -856,3 +887,67 @@ cl_semaphore_khr &clExternalSemaphore::getCLSemaphore() { return m_externalSemaphore; } + +cl_external_memory_handle_type_khr vkToOpenCLExternalMemoryHandleType( + VulkanExternalMemoryHandleType vkExternalMemoryHandleType) +{ + switch (vkExternalMemoryHandleType) + { + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD: + return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT: + return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT: + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR; + case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE: return 0; + } + return 0; +} + +VulkanImageTiling vkClExternalMemoryHandleTilingAssumption( + cl_device_id deviceId, + VulkanExternalMemoryHandleType vkExternalMemoryHandleType, int *error_ret) +{ + size_t size = 0; + VulkanImageTiling mode = VULKAN_IMAGE_TILING_OPTIMAL; + + assert(error_ret + != nullptr); // errcode_ret is not optional, it must be checked + + *error_ret = clGetDeviceInfo( + deviceId, + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR, + 0, nullptr, &size); + if (*error_ret != CL_SUCCESS) + { + return mode; + } + + if (size == 0) + { + return mode; + } + + std::vector<cl_external_memory_handle_type_khr> assume_linear_types( + size / sizeof(cl_external_memory_handle_type_khr)); + + *error_ret = clGetDeviceInfo( + deviceId, + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR, + size, assume_linear_types.data(), nullptr); + if (*error_ret != CL_SUCCESS) + { + return mode; + } + + if (std::find( + assume_linear_types.begin(), assume_linear_types.end(), + vkToOpenCLExternalMemoryHandleType(vkExternalMemoryHandleType)) + != assume_linear_types.end()) + { + mode = VULKAN_IMAGE_TILING_LINEAR; + } + + return mode; +} diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp index 5143332d..4a1d453e 100644 --- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp @@ -129,4 +129,8 @@ public: extern void init_cl_vk_ext(cl_platform_id); +VulkanImageTiling vkClExternalMemoryHandleTilingAssumption( + cl_device_id deviceId, + VulkanExternalMemoryHandleType vkExternalMemoryHandleType, int *error_ret); + #endif // _opencl_vulkan_wrapper_hpp_ diff --git a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp index c62a71e1..e9c06f98 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp @@ -75,6 +75,7 @@ VK_FUNC_DECL(vkDestroyImageView) \ VK_FUNC_DECL(vkCreateImage) \ VK_FUNC_DECL(vkGetImageMemoryRequirements) \ + VK_FUNC_DECL(vkGetImageMemoryRequirements2) \ VK_FUNC_DECL(vkDestroyImage) \ VK_FUNC_DECL(vkDestroyBuffer) \ VK_FUNC_DECL(vkDestroyPipeline) \ @@ -87,8 +88,9 @@ VK_FUNC_DECL(vkDestroyDescriptorSetLayout) \ VK_FUNC_DECL(vkGetPhysicalDeviceQueueFamilyProperties) \ VK_FUNC_DECL(vkGetPhysicalDeviceFeatures) \ - VK_FUNC_DECL(vkGetPhysicalDeviceProperties2KHR) \ + VK_FUNC_DECL(vkGetPhysicalDeviceProperties2) \ VK_FUNC_DECL(vkGetBufferMemoryRequirements) \ + VK_FUNC_DECL(vkGetBufferMemoryRequirements2) \ VK_FUNC_DECL(vkGetMemoryFdKHR) \ VK_FUNC_DECL(vkGetSemaphoreFdKHR) \ VK_FUNC_DECL(vkEnumeratePhysicalDeviceGroups) \ @@ -160,6 +162,7 @@ #define vkDestroyImageView _vkDestroyImageView #define vkCreateImage _vkCreateImage #define vkGetImageMemoryRequirements _vkGetImageMemoryRequirements +#define vkGetImageMemoryRequirements2 _vkGetImageMemoryRequirements2 #define vkDestroyImage _vkDestroyImage #define vkDestroyBuffer _vkDestroyBuffer #define vkDestroyPipeline _vkDestroyPipeline @@ -173,8 +176,9 @@ #define vkGetPhysicalDeviceQueueFamilyProperties \ _vkGetPhysicalDeviceQueueFamilyProperties #define vkGetPhysicalDeviceFeatures _vkGetPhysicalDeviceFeatures -#define vkGetPhysicalDeviceProperties2KHR _vkGetPhysicalDeviceProperties2KHR +#define vkGetPhysicalDeviceProperties2 _vkGetPhysicalDeviceProperties2 #define vkGetBufferMemoryRequirements _vkGetBufferMemoryRequirements +#define vkGetBufferMemoryRequirements2 _vkGetBufferMemoryRequirements2 #define vkGetMemoryFdKHR _vkGetMemoryFdKHR #define vkGetSemaphoreFdKHR _vkGetSemaphoreFdKHR #define vkEnumeratePhysicalDeviceGroups _vkEnumeratePhysicalDeviceGroups diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp index 4e276519..a5ca0901 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp @@ -141,6 +141,16 @@ VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList( VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList() {} +void VulkanDescriptorSetLayoutBindingList::addBinding( + size_t binding, VulkanDescriptorType descriptorType, + uint32_t descriptorCount, VulkanShaderStage shaderStage) +{ + VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding = + new VulkanDescriptorSetLayoutBinding(binding, descriptorType, + descriptorCount, shaderStage); + add(*descriptorSetLayoutBinding); +} + VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList( size_t numDescriptorSetLayoutBindings, VulkanDescriptorType descriptorType, uint32_t descriptorCount, VulkanShaderStage shaderStage) @@ -268,6 +278,7 @@ VulkanImage2DList::VulkanImage2DList( size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory, uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device, VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels, + VulkanImageTiling vulkanImageTiling, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, VulkanSharingMode sharingMode) @@ -275,8 +286,8 @@ VulkanImage2DList::VulkanImage2DList( for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++) { VulkanImage2D *image2D = new VulkanImage2D( - device, format, width, height, mipLevels, externalMemoryHandleType, - imageCreateFlag, imageUsage, sharingMode); + device, format, width, height, vulkanImageTiling, mipLevels, + externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode); add(*image2D); deviceMemory[i2DIdx]->bindImage( *image2D, baseOffset + (i2DIdx * interImageOffset)); @@ -285,16 +296,16 @@ VulkanImage2DList::VulkanImage2DList( VulkanImage2DList::VulkanImage2DList( size_t numImages, const VulkanDevice &device, VulkanFormat format, - uint32_t width, uint32_t height, uint32_t mipLevels, - VulkanExternalMemoryHandleType externalMemoryHandleType, + uint32_t width, uint32_t height, VulkanImageTiling vulkanImageTiling, + uint32_t mipLevels, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, VulkanSharingMode sharingMode) { for (size_t bIdx = 0; bIdx < numImages; bIdx++) { VulkanImage2D *image2D = new VulkanImage2D( - device, format, width, height, mipLevels, externalMemoryHandleType, - imageCreateFlag, imageUsage, sharingMode); + device, format, width, height, vulkanImageTiling, mipLevels, + externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode); add(*image2D); } } diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp index 52206779..ef00b70a 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp @@ -154,6 +154,10 @@ public: VulkanDescriptorType descriptorType0, uint32_t descriptorCount0, VulkanDescriptorType descriptorType1, uint32_t descriptorCount1, VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE); + void + addBinding(size_t binding, VulkanDescriptorType descriptorType, + uint32_t descriptorCount, + VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE); virtual ~VulkanDescriptorSetLayoutBindingList(); }; @@ -208,6 +212,7 @@ public: uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device, VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels, + VulkanImageTiling vulkanImageTiling, VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, @@ -216,7 +221,8 @@ public: VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); VulkanImage2DList( size_t numImages, const VulkanDevice &device, VulkanFormat format, - uint32_t width, uint32_t height, uint32_t mipLevels = 1, + uint32_t width, uint32_t height, VulkanImageTiling vulkanImageTiling, + uint32_t mipLevels = 1, VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp index 1a313cce..2124a275 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp @@ -21,6 +21,7 @@ #include <fstream> #include <set> #include <string> +#include <algorithm> #include <CL/cl.h> #include <CL/cl_ext.h> #if defined(_WIN32) || defined(_WIN64) @@ -248,6 +249,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList() } externalSemaphoreHandleTypeList.push_back( VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT); +#elif defined(__ANDROID__) + externalSemaphoreHandleTypeList.push_back( + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD); #else externalSemaphoreHandleTypeList.push_back( VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD); @@ -480,6 +484,33 @@ const std::vector<VulkanFormat> getSupportedVulkanFormatList() return formatList; } +cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType( + VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType) +{ + cl_external_semaphore_handle_type_khr clExternalSemaphoreHandleTypeKhr = 0; + switch (vulkanExternalSemaphoreHandleType) + { + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT: + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT: + clExternalSemaphoreHandleTypeKhr = + CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR; + break; + case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD: + clExternalSemaphoreHandleTypeKhr = CL_SEMAPHORE_HANDLE_SYNC_FD_KHR; + break; + default: break; + } + return clExternalSemaphoreHandleTypeKhr; +} + uint32_t getVulkanFormatElementSize(VulkanFormat format) { switch (format) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp index 04f5a594..51284125 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp @@ -33,7 +33,8 @@ const VulkanInstance& getVulkanInstance(); const VulkanPhysicalDevice& getVulkanPhysicalDevice(); const VulkanQueueFamily& -getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_MASK_ALL); +getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_GRAPHICS + | VULKAN_QUEUE_FLAG_COMPUTE); const VulkanMemoryType& getVulkanMemoryType(const VulkanDevice& device, VulkanMemoryTypeProperty memoryTypeProperty); @@ -51,6 +52,8 @@ const std::vector<VulkanFormat> getSupportedVulkanFormatList(); uint32_t getVulkanFormatElementSize(VulkanFormat format); const char* getVulkanFormatGLSLFormat(VulkanFormat format); const char* getVulkanFormatGLSLTypePrefix(VulkanFormat format); +cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType( + VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType); std::string prepareVulkanShader( std::string shaderCode, diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp index 3ce4af6b..73c5e9a1 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp @@ -72,7 +72,9 @@ VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE) #if defined(_WIN32) || defined(_WIN64) const char *vulkanLoaderLibraryName = "vulkan-1.dll"; -#elif defined(__linux__) +#elif defined(__ANDROID__) + const char *vulkanLoaderLibraryName = "libvulkan.so"; +#else const char *vulkanLoaderLibraryName = "libvulkan.so.1"; #endif #ifdef _WIN32 @@ -274,13 +276,13 @@ VulkanPhysicalDevice::VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice) VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR; vkPhysicalDeviceIDPropertiesKHR.pNext = NULL; - VkPhysicalDeviceProperties2KHR vkPhysicalDeviceProperties2KHR = {}; - vkPhysicalDeviceProperties2KHR.sType = + VkPhysicalDeviceProperties2 vkPhysicalDeviceProperties2 = {}; + vkPhysicalDeviceProperties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; - vkPhysicalDeviceProperties2KHR.pNext = &vkPhysicalDeviceIDPropertiesKHR; + vkPhysicalDeviceProperties2.pNext = &vkPhysicalDeviceIDPropertiesKHR; - vkGetPhysicalDeviceProperties2KHR(m_vkPhysicalDevice, - &vkPhysicalDeviceProperties2KHR); + vkGetPhysicalDeviceProperties2(m_vkPhysicalDevice, + &vkPhysicalDeviceProperties2); memcpy(m_vkDeviceUUID, vkPhysicalDeviceIDPropertiesKHR.deviceUUID, sizeof(m_vkDeviceUUID)); @@ -605,6 +607,37 @@ VulkanQueue &VulkanDevice::getQueue(const VulkanQueueFamily &queueFamily, VulkanDevice::operator VkDevice() const { return m_vkDevice; } //////////////////////////////// +// VulkanFence implementation // +//////////////////////////////// + +VulkanFence::VulkanFence(const VulkanDevice &vkDevice) +{ + + device = vkDevice; + + VkFenceCreateInfo fenceInfo{}; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceInfo.pNext = nullptr; + fenceInfo.flags = 0; + + VkResult vkStatus = vkCreateFence(device, &fenceInfo, nullptr, &fence); + + if (vkStatus != VK_SUCCESS) + { + throw std::runtime_error("Error: Failed create fence."); + } +} + +VulkanFence::~VulkanFence() { vkDestroyFence(device, fence, nullptr); } + +void VulkanFence::reset() { vkResetFences(device, 1, &fence); } + +void VulkanFence::wait() +{ + vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX); +} + +//////////////////////////////// // VulkanQueue implementation // //////////////////////////////// @@ -615,6 +648,22 @@ VulkanQueue::VulkanQueue(VkQueue vkQueue): m_vkQueue(vkQueue) {} VulkanQueue::~VulkanQueue() {} +void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer, + const std::shared_ptr<VulkanFence> &vkFence) +{ + VulkanCommandBufferList commandBufferList; + commandBufferList.add(commandBuffer); + + VkSubmitInfo vkSubmitInfo = {}; + vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + vkSubmitInfo.pNext = NULL; + vkSubmitInfo.waitSemaphoreCount = (uint32_t)0; + vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size(); + vkSubmitInfo.pCommandBuffers = commandBufferList(); + + vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, vkFence->fence); +} + void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList, const VulkanCommandBufferList &commandBufferList, const VulkanSemaphoreList &signalSemaphoreList) @@ -964,12 +1013,14 @@ void VulkanDescriptorPool::VulkanDescriptorPoolCommon( == vkDescriptorTypeToDescriptorCountMap.end()) { vkDescriptorTypeToDescriptorCountMap - [vkDescriptorSetLayoutBinding.descriptorType] = 1; + [vkDescriptorSetLayoutBinding.descriptorType] = + vkDescriptorSetLayoutBinding.descriptorCount; } else { vkDescriptorTypeToDescriptorCountMap - [vkDescriptorSetLayoutBinding.descriptorType]++; + [vkDescriptorSetLayoutBinding.descriptorType] += + vkDescriptorSetLayoutBinding.descriptorCount; } } @@ -1110,6 +1161,35 @@ void VulkanDescriptorSet::update(uint32_t binding, const VulkanBuffer &buffer) vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); } +void VulkanDescriptorSet::updateArray(uint32_t binding, unsigned numBuffers, + const VulkanBufferList &buffers) +{ + VkDescriptorBufferInfo *vkDescriptorBufferInfo = + (VkDescriptorBufferInfo *)calloc(numBuffers, + sizeof(VkDescriptorBufferInfo)); + for (unsigned i = 0; i < numBuffers; i++) + { + vkDescriptorBufferInfo[i].buffer = buffers[i]; + vkDescriptorBufferInfo[i].offset = 0; + vkDescriptorBufferInfo[i].range = VK_WHOLE_SIZE; + } + + VkWriteDescriptorSet vkWriteDescriptorSet = {}; + vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vkWriteDescriptorSet.pNext = NULL; + vkWriteDescriptorSet.dstSet = m_vkDescriptorSet; + vkWriteDescriptorSet.dstBinding = binding; + vkWriteDescriptorSet.dstArrayElement = 0; + vkWriteDescriptorSet.descriptorCount = numBuffers; + vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + vkWriteDescriptorSet.pImageInfo = NULL; + vkWriteDescriptorSet.pBufferInfo = vkDescriptorBufferInfo; + vkWriteDescriptorSet.pTexelBufferView = NULL; + + vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); + free(vkDescriptorBufferInfo); +} + void VulkanDescriptorSet::update(uint32_t binding, const VulkanImageView &imageView) { @@ -1133,6 +1213,34 @@ void VulkanDescriptorSet::update(uint32_t binding, vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); } +void VulkanDescriptorSet::updateArray(uint32_t binding, + const VulkanImageViewList &imageViewList) +{ + VkDescriptorImageInfo *vkDescriptorImageInfo = + new VkDescriptorImageInfo[imageViewList.size()]; + for (size_t i = 0; i < imageViewList.size(); i++) + { + vkDescriptorImageInfo[i].sampler = VK_NULL_HANDLE; + vkDescriptorImageInfo[i].imageView = imageViewList[i]; + vkDescriptorImageInfo[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + } + + VkWriteDescriptorSet vkWriteDescriptorSet = {}; + vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + vkWriteDescriptorSet.pNext = NULL; + vkWriteDescriptorSet.dstSet = m_vkDescriptorSet; + vkWriteDescriptorSet.dstBinding = binding; + vkWriteDescriptorSet.dstArrayElement = 0; + vkWriteDescriptorSet.descriptorCount = imageViewList.size(); + vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + vkWriteDescriptorSet.pImageInfo = vkDescriptorImageInfo; + vkWriteDescriptorSet.pBufferInfo = NULL; + vkWriteDescriptorSet.pTexelBufferView = NULL; + + vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL); + delete[] vkDescriptorImageInfo; +} + VulkanDescriptorSet::operator VkDescriptorSet() const { return m_vkDescriptorSet; @@ -1456,12 +1564,14 @@ VulkanBuffer::VulkanBuffer(const VulkanBuffer &buffer) m_memoryTypeList(buffer.m_memoryTypeList) {} +bool VulkanBuffer::isDedicated() const { return m_dedicated; } + VulkanBuffer::VulkanBuffer( const VulkanDevice &device, uint64_t size, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode, const VulkanQueueFamilyList &queueFamilyList) - : m_device(device), m_vkBuffer(VK_NULL_HANDLE) + : m_device(device), m_vkBuffer(VK_NULL_HANDLE), m_dedicated(false) { std::vector<uint32_t> queueFamilyIndexList; if (queueFamilyList.size() == 0) @@ -1507,16 +1617,36 @@ VulkanBuffer::VulkanBuffer( vkCreateBuffer(m_device, &vkBufferCreateInfo, NULL, &m_vkBuffer); - VkMemoryRequirements vkMemoryRequirements = {}; - vkGetBufferMemoryRequirements(m_device, m_vkBuffer, &vkMemoryRequirements); - m_size = vkMemoryRequirements.size; - m_alignment = vkMemoryRequirements.alignment; + VkMemoryDedicatedRequirements vkMemoryDedicatedRequirements = {}; + vkMemoryDedicatedRequirements.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; + vkMemoryDedicatedRequirements.pNext = NULL; + + VkMemoryRequirements2 vkMemoryRequirements = {}; + vkMemoryRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + vkMemoryRequirements.pNext = &vkMemoryDedicatedRequirements; + + VkBufferMemoryRequirementsInfo2 vkMemoryRequirementsInfo = {}; + + vkMemoryRequirementsInfo.sType = + VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2; + vkMemoryRequirementsInfo.buffer = m_vkBuffer; + vkMemoryRequirementsInfo.pNext = NULL; + + vkGetBufferMemoryRequirements2(m_device, &vkMemoryRequirementsInfo, + &vkMemoryRequirements); + + m_dedicated = vkMemoryDedicatedRequirements.requiresDedicatedAllocation; + + m_size = vkMemoryRequirements.memoryRequirements.size; + m_alignment = vkMemoryRequirements.memoryRequirements.alignment; const VulkanMemoryTypeList &memoryTypeList = m_device.getPhysicalDevice().getMemoryTypeList(); for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) { uint32_t memoryTypeIndex = memoryTypeList[mtIdx]; - if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits) + if ((1 << memoryTypeIndex) + & vkMemoryRequirements.memoryRequirements.memoryTypeBits) { m_memoryTypeList.add(memoryTypeList[mtIdx]); } @@ -1591,16 +1721,36 @@ VulkanImage::VulkanImage( vkCreateImage(m_device, &vkImageCreateInfo, NULL, &m_vkImage); VulkanImageCreateInfo = vkImageCreateInfo; - VkMemoryRequirements vkMemoryRequirements = {}; - vkGetImageMemoryRequirements(m_device, m_vkImage, &vkMemoryRequirements); - m_size = vkMemoryRequirements.size; - m_alignment = vkMemoryRequirements.alignment; + + VkMemoryDedicatedRequirements vkMemoryDedicatedRequirements = {}; + vkMemoryDedicatedRequirements.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS; + vkMemoryDedicatedRequirements.pNext = NULL; + + VkMemoryRequirements2 vkMemoryRequirements = {}; + vkMemoryRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + vkMemoryRequirements.pNext = &vkMemoryDedicatedRequirements; + + VkImageMemoryRequirementsInfo2 vkMemoryRequirementsInfo = {}; + + vkMemoryRequirementsInfo.sType = + VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2; + vkMemoryRequirementsInfo.image = m_vkImage; + vkMemoryRequirementsInfo.pNext = NULL; + + vkGetImageMemoryRequirements2(m_device, &vkMemoryRequirementsInfo, + &vkMemoryRequirements); + m_size = vkMemoryRequirements.memoryRequirements.size; + m_alignment = vkMemoryRequirements.memoryRequirements.alignment; + m_dedicated = vkMemoryDedicatedRequirements.requiresDedicatedAllocation; + const VulkanMemoryTypeList &memoryTypeList = m_device.getPhysicalDevice().getMemoryTypeList(); for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++) { uint32_t memoryTypeIndex = memoryTypeList[mtIdx]; - if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits) + if ((1 << memoryTypeIndex) + & vkMemoryRequirements.memoryRequirements.memoryTypeBits) { m_memoryTypeList.add(memoryTypeList[mtIdx]); } @@ -1629,6 +1779,8 @@ uint64_t VulkanImage::getSize() const { return m_size; } uint64_t VulkanImage::getAlignment() const { return m_alignment; } +bool VulkanImage::isDedicated() const { return m_dedicated; } + const VulkanMemoryTypeList &VulkanImage::getMemoryTypeList() const { return m_memoryTypeList; @@ -1645,14 +1797,14 @@ VulkanImage2D::VulkanImage2D(const VulkanImage2D &image2D): VulkanImage(image2D) VulkanImage2D::VulkanImage2D( const VulkanDevice &device, VulkanFormat format, uint32_t width, - uint32_t height, uint32_t numMipLevels, + uint32_t height, VulkanImageTiling imageTiling, uint32_t numMipLevels, VulkanExternalMemoryHandleType externalMemoryHandleType, VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage, VulkanSharingMode sharingMode) : VulkanImage(device, VULKAN_IMAGE_TYPE_2D, format, VulkanExtent3D(width, height, 1), numMipLevels, 1, - externalMemoryHandleType, imageCreateFlag, - VULKAN_IMAGE_TILING_OPTIMAL, imageUsage, sharingMode) + externalMemoryHandleType, imageCreateFlag, imageTiling, + imageUsage, sharingMode) {} VulkanImage2D::~VulkanImage2D() {} @@ -1839,7 +1991,8 @@ VulkanDeviceMemory::VulkanDeviceMemory( const VulkanDevice &device, const VulkanImage &image, const VulkanMemoryType &memoryType, VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name) - : m_device(device), m_size(image.getSize()), m_isDedicated(true) + : m_device(device), m_size(image.getSize()), + m_isDedicated(image.isDedicated()) { #if defined(_WIN32) || defined(_WIN64) WindowsSecurityAttributes winSecurityAttributes; @@ -1872,20 +2025,95 @@ VulkanDeviceMemory::VulkanDeviceMemory( VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {}; vkMemoryDedicatedAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; - vkMemoryDedicatedAllocateInfo.pNext = - externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + vkMemoryDedicatedAllocateInfo.pNext = NULL; vkMemoryDedicatedAllocateInfo.image = image; vkMemoryDedicatedAllocateInfo.buffer = VK_NULL_HANDLE; VkMemoryAllocateInfo vkMemoryAllocateInfo = {}; vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo; vkMemoryAllocateInfo.allocationSize = m_size; vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType; + if (m_isDedicated) + { + vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo; + vkMemoryDedicatedAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + else + { + vkMemoryAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory); } +VulkanDeviceMemory::VulkanDeviceMemory( + const VulkanDevice &device, const VulkanBuffer &buffer, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name) + : m_device(device), m_size(buffer.getSize()), + m_isDedicated(buffer.isDedicated()) +{ +#if defined(_WIN32) || defined(_WIN64) + WindowsSecurityAttributes winSecurityAttributes; + + VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {}; + vkExportMemoryWin32HandleInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR; + vkExportMemoryWin32HandleInfoKHR.pNext = NULL; + vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes; + vkExportMemoryWin32HandleInfoKHR.dwAccess = + DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE; + vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name; + +#endif + + VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {}; + vkExportMemoryAllocateInfoKHR.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR; +#if defined(_WIN32) || defined(_WIN64) + vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType + & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT + ? &vkExportMemoryWin32HandleInfoKHR + : NULL; +#else + vkExportMemoryAllocateInfoKHR.pNext = NULL; +#endif + vkExportMemoryAllocateInfoKHR.handleTypes = + (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType; + + VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {}; + vkMemoryDedicatedAllocateInfo.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + vkMemoryDedicatedAllocateInfo.pNext = NULL; + vkMemoryDedicatedAllocateInfo.image = VK_NULL_HANDLE; + vkMemoryDedicatedAllocateInfo.buffer = buffer; + + VkMemoryAllocateInfo vkMemoryAllocateInfo = {}; + vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + vkMemoryAllocateInfo.allocationSize = m_size; + vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType; + + if (m_isDedicated) + { + vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo; + vkMemoryDedicatedAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + else + { + vkMemoryAllocateInfo.pNext = + externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL; + } + + + VkResult res = vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, + &m_vkDeviceMemory); + ASSERT_SUCCESS(res, "Failed to allocate device memory"); +} + VulkanDeviceMemory::~VulkanDeviceMemory() { vkFreeMemory(m_device, m_vkDeviceMemory, NULL); @@ -1952,11 +2180,21 @@ void VulkanDeviceMemory::unmap() { vkUnmapMemory(m_device, m_vkDeviceMemory); } void VulkanDeviceMemory::bindBuffer(const VulkanBuffer &buffer, uint64_t offset) { + if (buffer.isDedicated() && !m_isDedicated) + { + throw std::runtime_error( + "Buffer requires dedicated memory. Failed to bind"); + } vkBindBufferMemory(m_device, buffer, m_vkDeviceMemory, offset); } void VulkanDeviceMemory::bindImage(const VulkanImage &image, uint64_t offset) { + if (image.isDedicated() && !m_isDedicated) + { + throw std::runtime_error( + "Image requires dedicated memory. Failed to bind"); + } vkBindImageMemory(m_device, image, m_vkDeviceMemory, offset); } diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp index 37925ee4..7fcc70f3 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp @@ -21,6 +21,7 @@ #include "vulkan_wrapper_types.hpp" #include "vulkan_list_map.hpp" #include "vulkan_api_list.hpp" +#include <memory> class VulkanInstance { friend const VulkanInstance &getVulkanInstance(); @@ -145,6 +146,20 @@ public: operator VkDevice() const; }; +class VulkanFence { + friend class VulkanQueue; + +protected: + VkFence fence; + VkDevice device; + +public: + VulkanFence(const VulkanDevice &device); + virtual ~VulkanFence(); + void reset(); + void wait(); +}; + class VulkanQueue { friend class VulkanDevice; @@ -157,6 +172,8 @@ protected: public: const VulkanQueueFamily &getQueueFamily(); + void submit(const VulkanCommandBuffer &commandBuffer, + const std::shared_ptr<VulkanFence> &fence); void submit(const VulkanSemaphoreList &waitSemaphoreList, const VulkanCommandBufferList &commandBufferList, const VulkanSemaphoreList &signalSemaphoreList); @@ -311,7 +328,11 @@ public: const VulkanDescriptorSetLayout &descriptorSetLayout); virtual ~VulkanDescriptorSet(); void update(uint32_t binding, const VulkanBuffer &buffer); + void updateArray(uint32_t binding, unsigned numBuffers, + const VulkanBufferList &buffers); void update(uint32_t binding, const VulkanImageView &imageView); + void updateArray(uint32_t binding, + const VulkanImageViewList &imageViewList); operator VkDescriptorSet() const; }; @@ -407,6 +428,7 @@ protected: VkBuffer m_vkBuffer; uint64_t m_size; uint64_t m_alignment; + bool m_dedicated; VulkanMemoryTypeList m_memoryTypeList; VulkanBuffer(const VulkanBuffer &buffer); @@ -424,6 +446,7 @@ public: uint64_t getSize() const; uint64_t getAlignment() const; const VulkanMemoryTypeList &getMemoryTypeList() const; + bool isDedicated() const; operator VkBuffer() const; }; @@ -435,6 +458,7 @@ protected: const VulkanFormat m_format; const uint32_t m_numMipLevels; const uint32_t m_numLayers; + bool m_dedicated; VkImage m_vkImage; uint64_t m_size; uint64_t m_alignment; @@ -461,6 +485,7 @@ public: uint32_t getNumLayers() const; uint64_t getSize() const; uint64_t getAlignment() const; + bool isDedicated() const; const VulkanMemoryTypeList &getMemoryTypeList() const; VkImageCreateInfo getVkImageCreateInfo() const; operator VkImage() const; @@ -470,12 +495,11 @@ class VulkanImage2D : public VulkanImage { protected: VkImageView m_vkImageView; - VulkanImage2D(const VulkanImage2D &image2D); - public: VulkanImage2D( const VulkanDevice &device, VulkanFormat format, uint32_t width, - uint32_t height, uint32_t numMipLevels = 1, + uint32_t height, VulkanImageTiling imageTiling, + uint32_t numMipLevels = 1, VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE, @@ -484,6 +508,8 @@ public: VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE); virtual ~VulkanImage2D(); virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const; + + VulkanImage2D(const VulkanImage2D &image2D); }; class VulkanImageView { @@ -524,6 +550,11 @@ public: VulkanExternalMemoryHandleType externalMemoryHandleType = VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, const void *name = NULL); + VulkanDeviceMemory(const VulkanDevice &device, const VulkanBuffer &buffer, + const VulkanMemoryType &memoryType, + VulkanExternalMemoryHandleType externalMemoryHandleType = + VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE, + const void *name = NULL); virtual ~VulkanDeviceMemory(); uint64_t getSize() const; #ifdef _WIN32 @@ -569,7 +600,6 @@ public: operator VkSemaphore() const; }; - #define VK_FUNC_DECL(name) extern "C" PFN_##name _##name; VK_FUNC_LIST #if defined(_WIN32) || defined(_WIN64) diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp index 2473a1d7..fcd19373 100644 --- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp +++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp @@ -169,7 +169,9 @@ enum VulkanExternalSemaphoreHandleType VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR - | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR + | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR, + VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR }; enum VulkanBufferUsage diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp index 3e4b0b8e..645d3f70 100644 --- a/test_conformance/commonfns/main.cpp +++ b/test_conformance/commonfns/main.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -18,8 +18,10 @@ #include <string.h> #include "procs.h" #include "test_base.h" +#include "harness/kernelHelpers.h" std::map<size_t, std::string> BaseFunctionTest::type2name; +cl_half_rounding_mode BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount]; int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3}; @@ -45,17 +47,38 @@ test_definition test_list[] = { const int test_num = ARRAY_SIZE( test_list ); -int main(int argc, const char *argv[]) +test_status InitCL(cl_device_id device) { - initVecSizes(); - - if (BaseFunctionTest::type2name.empty()) + if (is_extension_available(device, "cl_khr_fp16")) { - BaseFunctionTest::type2name[sizeof(half)] = "half"; - BaseFunctionTest::type2name[sizeof(float)] = "float"; - BaseFunctionTest::type2name[sizeof(double)] = "double"; + const cl_device_fp_config fpConfigHalf = + get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG); + if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTE; + } + else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0) + { + BaseFunctionTest::halfRoundingMode = CL_HALF_RTZ; + } + else + { + log_error("Error while acquiring half rounding mode"); + return TEST_FAIL; + } } - return runTestHarness(argc, argv, test_num, test_list, false, 0); + return TEST_PASS; } +int main(int argc, const char *argv[]) +{ + initVecSizes(); + + BaseFunctionTest::type2name[sizeof(half)] = "half"; + BaseFunctionTest::type2name[sizeof(float)] = "float"; + BaseFunctionTest::type2name[sizeof(double)] = "double"; + + return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0, + InitCL); +} diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h index 44291042..be36ed26 100644 --- a/test_conformance/commonfns/test_base.h +++ b/test_conformance/commonfns/test_base.h @@ -19,27 +19,23 @@ #include <vector> #include <map> #include <memory> +#include <cmath> #include <CL/cl_half.h> #include <CL/cl_ext.h> -#include "harness/deviceInfo.h" #include "harness/testHarness.h" #include "harness/typeWrappers.h" - template <typename T> using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const, const int num, const int vs, const int vp); - template <typename T> using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num); - using half = cl_half; - struct BaseFunctionTest { BaseFunctionTest(cl_device_id device, cl_context context, @@ -61,9 +57,9 @@ struct BaseFunctionTest bool vecParam; static std::map<size_t, std::string> type2name; + static cl_half_rounding_mode halfRoundingMode; }; - struct MinTest : BaseFunctionTest { MinTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -74,7 +70,6 @@ struct MinTest : BaseFunctionTest cl_int Run() override; }; - struct MaxTest : BaseFunctionTest { MaxTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -85,7 +80,6 @@ struct MaxTest : BaseFunctionTest cl_int Run() override; }; - struct ClampTest : BaseFunctionTest { ClampTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -96,7 +90,6 @@ struct ClampTest : BaseFunctionTest cl_int Run() override; }; - struct DegreesTest : BaseFunctionTest { DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -107,7 +100,6 @@ struct DegreesTest : BaseFunctionTest cl_int Run() override; }; - struct RadiansTest : BaseFunctionTest { RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -118,7 +110,6 @@ struct RadiansTest : BaseFunctionTest cl_int Run() override; }; - struct SignTest : BaseFunctionTest { SignTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -129,7 +120,6 @@ struct SignTest : BaseFunctionTest cl_int Run() override; }; - struct SmoothstepTest : BaseFunctionTest { SmoothstepTest(cl_device_id device, cl_context context, @@ -141,7 +131,6 @@ struct SmoothstepTest : BaseFunctionTest cl_int Run() override; }; - struct StepTest : BaseFunctionTest { StepTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -152,7 +141,6 @@ struct StepTest : BaseFunctionTest cl_int Run() override; }; - struct MixTest : BaseFunctionTest { MixTest(cl_device_id device, cl_context context, cl_command_queue queue, @@ -163,19 +151,71 @@ struct MixTest : BaseFunctionTest cl_int Run() override; }; +template <typename T> float UlpFn(const T &val, const double &r) +{ + if (std::is_same<T, half>::value) + { + return Ulp_Error_Half(val, r); + } + else if (std::is_same<T, float>::value) + { + return Ulp_Error(val, r); + } + else if (std::is_same<T, double>::value) + { + return Ulp_Error_Double(val, r); + } + else + { + log_error("UlpFn: unsupported data type\n"); + } + + return -1.f; // wrong val +} + +template <typename T> inline double conv_to_dbl(const T &val) +{ + if (std::is_same<T, half>::value) + return (double)cl_half_to_float(val); + else + return (double)val; +} -template <typename... Args> -std::string string_format(const std::string &format, Args... args) +template <typename T> inline double conv_to_flt(const T &val) { - int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; - if (sformat <= 0) - throw std::runtime_error("string_format: string processing error."); - auto format_size = static_cast<size_t>(sformat); - std::unique_ptr<char[]> buffer(new char[format_size]); - std::snprintf(buffer.get(), format_size, format.c_str(), args...); - return std::string(buffer.get(), buffer.get() + format_size - 1); + if (std::is_same<T, half>::value) + return (float)cl_half_to_float(val); + else + return (float)val; } +template <typename T> inline half conv_to_half(const T &val) +{ + if (std::is_floating_point<T>::value) + return cl_half_from_float(val, BaseFunctionTest::halfRoundingMode); + return 0; +} + +template <typename T> bool isfinite_fp(const T &v) +{ + if (std::is_same<T, half>::value) + { + // Extract FP16 exponent and mantissa + uint16_t h_exp = (((half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F; + uint16_t h_mant = ((half)v) & 0x3FF; + + // !Inf test + return !(h_exp == 0x1F && h_mant == 0); + } + else + { +#if !defined(_WIN32) + return std::isfinite(v); +#else + return isfinite(v); +#endif + } +} template <class T> int MakeAndRunTest(cl_device_id device, cl_context context, diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp index 1eb12f73..a6c75647 100644 --- a/test_conformance/commonfns/test_binary_fn.cpp +++ b/test_conformance/commonfns/test_binary_fn.cpp @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -22,6 +22,7 @@ #include "harness/deviceInfo.h" #include "harness/typeWrappers.h" +#include "harness/stringHelpers.h" #include "procs.h" #include "test_base.h" @@ -53,7 +54,6 @@ const char *binary_fn_code_pattern_v3_scalar = " vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n" "}\n"; - template <typename T> int test_binary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -105,6 +105,16 @@ int test_binary_fn(cl_device_id device, cl_context context, input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same<T, half>::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -125,22 +135,22 @@ int test_binary_fn(cl_device_id device, cl_context context, { std::string str = binary_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } else { std::string str = binary_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), fnName.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), fnName.c_str()); } } else { // do regular std::string str = binary_fn_code_pattern; - kernelSource = string_format( + kernelSource = str_sprintf( str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i], tname.c_str(), vecSecParam ? vecSizeNames[i] : "", tname.c_str(), vecSizeNames[i], fnName.c_str()); @@ -203,13 +213,20 @@ int max_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] < y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same<T, half>::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -227,13 +244,20 @@ int min_verify(const T* const x, const T* const y, const T* const out, { int k = i * vecSize + j; int l = (k * vecParam + i * (1 - vecParam)); - T v = (x[k] > y[l]) ? y[l] : x[k]; + T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k]; if (v != out[k]) { - log_error( - "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is " - "vector %d, element %d, for vector size %d)\n", - k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); + if (std::is_same<T, half>::value) + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k, + conv_to_flt(out[k]), v, k, i, j, vecSize); + else + log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. " + "(index %d is " + "vector %d, element %d, for vector size %d)\n", + k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize); return -1; } } @@ -246,6 +270,13 @@ int min_verify(const T* const x, const T* const y, const T* const out, cl_int MaxTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn<cl_half>(device, context, queue, num_elems, + fnName.c_str(), vecParam, + max_verify<cl_half>); + test_error(error, "MaxTest::Run<cl_half> failed"); + } error = test_binary_fn<float>(device, context, queue, num_elems, fnName.c_str(), vecParam, max_verify<float>); @@ -265,6 +296,13 @@ cl_int MaxTest::Run() cl_int MinTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_binary_fn<cl_half>(device, context, queue, num_elems, + fnName.c_str(), vecParam, + min_verify<cl_half>); + test_error(error, "MinTest::Run<cl_half> failed"); + } error = test_binary_fn<float>(device, context, queue, num_elems, fnName.c_str(), vecParam, min_verify<float>); diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp index 0e96fb60..1bf40677 100644 --- a/test_conformance/commonfns/test_clamp.cpp +++ b/test_conformance/commonfns/test_clamp.cpp @@ -26,12 +26,10 @@ #include "procs.h" #include "test_base.h" - #ifndef M_PI #define M_PI 3.14159265358979323846264338327950288 #endif - #define CLAMP_KERNEL(type) \ const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \ "__kernel void test_clamp(__global " #type " *x, __global " #type \ @@ -64,6 +62,14 @@ "vload3(tid,maxval)), tid, dst);\n" \ "}\n"; +#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n" +CLAMP_KERNEL(half) +CLAMP_KERNEL_V(half, 2) +CLAMP_KERNEL_V(half, 4) +CLAMP_KERNEL_V(half, 8) +CLAMP_KERNEL_V(half, 16) +CLAMP_KERNEL_V3(half, 3) +#undef EMIT_PRAGMA_DIRECTIVE #define EMIT_PRAGMA_DIRECTIVE " " CLAMP_KERNEL(float) @@ -83,6 +89,10 @@ CLAMP_KERNEL_V(double, 16) CLAMP_KERNEL_V3(double, 3) #undef EMIT_PRAGMA_DIRECTIVE +const char *clamp_half_codes[] = { + clamp_half_kernel_code, clamp_half2_kernel_code, clamp_half4_kernel_code, + clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code +}; const char *clamp_float_codes[] = { clamp_float_kernel_code, clamp_float2_kernel_code, clamp_float4_kernel_code, clamp_float8_kernel_code, @@ -96,21 +106,42 @@ const char *clamp_double_codes[] = { namespace { - template <typename T> int verify_clamp(const T *const x, const T *const minval, const T *const maxval, const T *const outptr, int n) { - T t; - for (int i = 0; i < n; i++) + if (std::is_same<T, half>::value) + { + float t; + for (int i = 0; i < n; i++) + { + t = std::min( + std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])), + cl_half_to_float(maxval[i])); + if (t != cl_half_to_float(outptr[i])) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]), + cl_half_to_float(maxval[i]), t, + cl_half_to_float(outptr[i])); + return -1; + } + } + } + else { - t = std::min(std::max(x[i], minval[i]), maxval[i]); - if (t != outptr[i]) + T t; + for (int i = 0; i < n; i++) { - log_error( - "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i, - x[i], minval[i], maxval[i], t, outptr[i]); - return -1; + t = std::min(std::max(x[i], minval[i]), maxval[i]); + if (t != outptr[i]) + { + log_error( + "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", + i, x[i], minval[i], maxval[i], t, outptr[i]); + return -1; + } } } @@ -118,7 +149,6 @@ int verify_clamp(const T *const x, const T *const minval, const T *const maxval, } } - template <typename T> int test_clamp_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) @@ -169,6 +199,17 @@ int test_clamp_fn(cl_device_id device, cl_context context, input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d); } } + else if (std::is_same<T, half>::value) + { + const float fval = CL_HALF_MAX; + for (j = 0; j < num_elements; j++) + { + input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[2][j] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[1][j]), fval, d)); + } + } for (i = 0; i < 3; i++) { @@ -194,9 +235,16 @@ int test_clamp_fn(cl_device_id device, cl_context context, "test_clamp"); test_error(err, "Unable to create kernel"); } + else if (std::is_same<T, half>::value) + { + err = create_single_kernel_helper( + context, &programs[i], &kernels[i], 1, &clamp_half_codes[i], + "test_clamp"); + test_error(err, "Unable to create kernel"); + } - log_info("Just made a program for float, i=%d, size=%d, in slot %d\n", - i, g_arrVecSizes[i], i); + log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n", + tname.c_str(), i, g_arrVecSizes[i], i); fflush(stdout); for (j = 0; j < 4; j++) @@ -239,10 +287,14 @@ int test_clamp_fn(cl_device_id device, cl_context context, return err; } - cl_int ClampTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_clamp_fn<cl_half>(device, context, queue, num_elems); + test_error(error, "ClampTest::Run<cl_half> failed"); + } error = test_clamp_fn<float>(device, context, queue, num_elems); test_error(error, "ClampTest::Run<float> failed"); @@ -256,7 +308,6 @@ cl_int ClampTest::Run() return error; } - int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp index 92c10100..2a06e43d 100644 --- a/test_conformance/commonfns/test_mix.cpp +++ b/test_conformance/commonfns/test_mix.cpp @@ -18,6 +18,8 @@ #include <sys/types.h> #include <sys/stat.h> +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" @@ -52,33 +54,42 @@ const char *mix_fn_code_pattern_v3_scalar = " vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n" "}\n"; - #define MAX_ERR 1e-3 namespace { - template <typename T> int verify_mix(const T *const inptrX, const T *const inptrY, const T *const inptrA, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r; - float delta = 0.0f; + double r, o; + float delta = 0.f, max_delta = 0.f; int i; if (vecParam) { for (i = 0; i < n * veclen; i++) { - r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]); - delta = fabs(double(r - outptr[i])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[i]) + + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i])) + * conv_to_dbl(inptrA[i])); + + o = conv_to_dbl(outptr[i]); + delta = fabs(double(r - o)) / r; + if (!std::is_same<T, half>::value) + { + if (delta > MAX_ERR) + { + log_error("%d) verification error: mix(%a, %a, %a) = *%a " + "vs. %a\n", + i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); + return -1; + } + } + else { - log_error( - "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i, - inptrX[i], inptrY[i], inptrA[i], r, outptr[i]); - return -1; + max_delta = std::max(max_delta, delta); } } } @@ -90,25 +101,40 @@ int verify_mix(const T *const inptrX, const T *const inptrY, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]); - delta = fabs(double(r - outptr[vi])) / r; - if (delta > MAX_ERR) + r = conv_to_dbl(inptrX[vi]) + + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi])) + * conv_to_dbl(inptrA[i])); + delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r; + if (!std::is_same<T, half>::value) { - log_error("{%d, element %d}) verification error: mix(%a, " - "%a, %a) = *%a vs. %a\n", - ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, - outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "{%d, element %d}) verification error: mix(%a, " + "%a, %a) = *%a vs. %a\n", + ii, j, inptrX[vi], inptrY[vi], inptrA[i], r, + outptr[vi]); + return -1; + } + } + else + { + max_delta = std::max(max_delta, delta); } } } } + // due to the fact that accuracy of mix for cl_khr_fp16 is implementation + // defined this test only reports maximum error without testing maximum + // error threshold + if (std::is_same<T, half>::value) + log_error("mix half verification result, max delta: %a\n", max_delta); + return 0; } } // namespace - template <typename T> int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -120,7 +146,7 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, std::vector<clKernelWrapper> kernels; int err, i; - MTdataHolder d = MTdataHolder(gRandomSeed); + MTdataHolder d(gRandomSeed); assert(BaseFunctionTest::type2name.find(sizeof(T)) != BaseFunctionTest::type2name.end()); @@ -142,19 +168,32 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, test_error(err, "clCreateBuffer failed"); } - for (i = 0; i < num_elements; i++) - { - input_ptr[0][i] = (T)genrand_real1(d); - input_ptr[1][i] = (T)genrand_real1(d); - input_ptr[2][i] = (T)genrand_real1(d); - } - std::string pragma_str; if (std::is_same<T, double>::value) { pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; } + if (std::is_same<T, half>::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[1][i] = conv_to_half((float)genrand_real1(d)); + input_ptr[2][i] = conv_to_half((float)genrand_real1(d)); + } + } + else + { + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = (T)genrand_real1(d); + input_ptr[1][i] = (T)genrand_real1(d); + input_ptr[2][i] = (T)genrand_real1(d); + } + } + for (i = 0; i < 3; i++) { err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, @@ -164,7 +203,6 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, } char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; - for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; @@ -174,15 +212,15 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, { std::string str = mix_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = mix_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -190,10 +228,10 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, // regular path std::string str = mix_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i], - tname.c_str(), vecParam ? vecSizeNames[i] : "", - tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i], + tname.c_str(), vecParam ? vecSizeNames[i] : "", + tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -242,10 +280,14 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue, return err; } - cl_int MixTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_mix_fn<half>(device, context, queue, num_elems, vecParam); + test_error(error, "MixTest::Run<cl_half> failed"); + } error = test_mix_fn<float>(device, context, queue, num_elems, vecParam); test_error(error, "MixTest::Run<float> failed"); @@ -260,7 +302,6 @@ cl_int MixTest::Run() return error; } - int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -268,7 +309,6 @@ int test_mix(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp index 31948d3f..5afc2d0f 100644 --- a/test_conformance/commonfns/test_smoothstep.cpp +++ b/test_conformance/commonfns/test_smoothstep.cpp @@ -18,10 +18,11 @@ #include <sys/types.h> #include <sys/stat.h> +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *smoothstep_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s " @@ -53,38 +54,43 @@ const char *smoothstep_fn_code_pattern_v3_scalar = " vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - #define MAX_ERR (1e-5f) namespace { - template <typename T> int verify_smoothstep(const T *const edge0, const T *const edge1, const T *const x, const T *const outptr, const int n, const int veclen, const bool vecParam) { - T r, t; - float delta = 0; + double r, t; + float delta = 0, max_delta = 0; if (vecParam) { for (int i = 0; i < n * veclen; i++) { - t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[i]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[i])); + if (!std::is_same<T, half>::value) { - log_error("%d) verification error: smoothstep(%a, %a, %a) = " - "*%a vs. %a\n", - i, x[i], edge0[i], edge1[i], r, outptr[i]); - return -1; + if (delta > MAX_ERR) + { + log_error( + "%d) verification error: smoothstep(%a, %a, %a) = " + "*%a vs. %a\n", + i, x[i], edge0[i], edge1[i], r, outptr[i]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } else @@ -95,32 +101,48 @@ int verify_smoothstep(const T *const edge0, const T *const edge1, int vi = i * veclen; for (int j = 0; j < veclen; ++j, ++vi) { - t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]); - if (t < 0.0f) - t = 0.0f; - else if (t > 1.0f) - t = 1.0f; - r = t * t * (3.0f - 2.0f * t); - delta = (float)fabs(r - outptr[vi]); - if (delta > MAX_ERR) + t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i])) + / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i])); + if (t < 0.0) + t = 0.0; + else if (t > 1.0) + t = 1.0; + r = t * t * (3.0 - 2.0 * t); + delta = (float)fabs(r - conv_to_dbl(outptr[vi])); + + if (!std::is_same<T, half>::value) { - log_error("{%d, element %d}) verification error: " - "smoothstep(%a, %a, %a) = *%a vs. %a\n", - ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]); - return -1; + if (delta > MAX_ERR) + { + log_error("{%d, element %d}) verification error: " + "smoothstep(%a, %a, %a) = *%a vs. %a\n", + ii, j, x[vi], edge0[i], edge1[i], r, + outptr[vi]); + return -1; + } } + else + max_delta = std::max(max_delta, delta); } } } + + // due to the fact that accuracy of smoothstep for cl_khr_fp16 is + // implementation defined this test only reports maximum error without + // testing maximum error threshold + if (std::is_same<T, half>::value) + log_error("smoothstep half verification result, max delta: %a\n", + max_delta); + return 0; } } - template <typename T> int test_smoothstep_fn(cl_device_id device, cl_context context, - cl_command_queue queue, int n_elems, bool vecParam) + cl_command_queue queue, const int n_elems, + const bool vecParam) { clMemWrapper streams[4]; std::vector<T> input_ptr[3], output_ptr; @@ -170,6 +192,17 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d); } } + else if (std::is_same<T, half>::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d)); + input_ptr[1][i] = conv_to_half( + get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d)); + input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d)); + } + } for (i = 0; i < 3; i++) { @@ -179,7 +212,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, test_error(err, "Unable to write input buffer"); } - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; for (i = 0; i < kTotalVecCount; i++) { @@ -190,15 +223,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, { std::string str = smoothstep_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } else { std::string str = smoothstep_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str(), tname.c_str()); } } else @@ -206,11 +239,12 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, // regular path std::string str = smoothstep_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } + const char *programPtr = kernelSource.c_str(); err = create_single_kernel_helper(context, &programs[i], &kernels[i], 1, @@ -259,10 +293,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context, return err; } - cl_int SmoothstepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_smoothstep_fn<half>(device, context, queue, num_elems, + vecParam); + test_error(error, "SmoothstepTest::Run<cl_half> failed"); + } error = test_smoothstep_fn<float>(device, context, queue, num_elems, vecParam); @@ -278,7 +317,6 @@ cl_int SmoothstepTest::Run() return error; } - int test_smoothstep(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -286,7 +324,6 @@ int test_smoothstep(cl_device_id device, cl_context context, "smoothstep", true); } - int test_smoothstepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp index dc91766e..1cfa96ea 100644 --- a/test_conformance/commonfns/test_step.cpp +++ b/test_conformance/commonfns/test_step.cpp @@ -18,10 +18,11 @@ #include <sys/types.h> #include <sys/stat.h> +#include "harness/stringHelpers.h" + #include "procs.h" #include "test_base.h" - const char *step_fn_code_pattern = "%s\n" /* optional pragma */ "__kernel void test_fn(__global %s%s *edge, " "__global %s%s *x, __global %s%s *dst)\n" @@ -48,7 +49,6 @@ const char *step_fn_code_pattern_v3_scalar = " vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n" "}\n"; - namespace { template <typename T> @@ -62,8 +62,8 @@ int verify_step(const T *const inptrA, const T *const inptrB, { for (int i = 0; i < n * veclen; i++) { - r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0; - if (r != outptr[i]) return -1; + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[i])) ? 0.0 : 1.0; + if (r != conv_to_dbl(outptr[i])) return -1; } } else @@ -73,24 +73,31 @@ int verify_step(const T *const inptrA, const T *const inptrB, int ii = i / veclen; for (int j = 0; j < veclen && i < n; ++j, ++i) { - r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f; - if (r != outptr[i]) + r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[ii])) ? 0.0f + : 1.0f; + if (r != conv_to_dbl(outptr[i])) { - log_error("Failure @ {%d, element %d}: step(%a,%a) -> *%a " - "vs %a\n", - ii, j, inptrA[ii], inptrB[i], r, outptr[i]); + if (std::is_same<T, half>::value) + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, conv_to_flt(inptrA[ii]), + conv_to_flt(inptrB[i]), r, conv_to_flt(outptr[i])); + else + log_error( + "Failure @ {%d, element %d}: step(%a,%a) -> *%a " + "vs %a\n", + ii, j, inptrA[ii], inptrB[i], r, outptr[i]); return -1; } } } } - return 0; } } - template <typename T> int test_step_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, bool vecParam) @@ -140,6 +147,16 @@ int test_step_fn(cl_device_id device, cl_context context, input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d); } } + else if (std::is_same<T, half>::value) + { + const float fval = CL_HALF_MAX; + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (i = 0; i < num_elements; i++) + { + input_ptr[0][i] = conv_to_half(get_random_float(-fval, fval, d)); + input_ptr[1][i] = conv_to_half(get_random_float(-fval, fval, d)); + } + } for (i = 0; i < 2; i++) { @@ -160,15 +177,15 @@ int test_step_fn(cl_device_id device, cl_context context, { std::string str = step_fn_code_pattern_v3; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } else { std::string str = step_fn_code_pattern_v3_scalar; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), tname.c_str()); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), tname.c_str()); } } else @@ -176,9 +193,9 @@ int test_step_fn(cl_device_id device, cl_context context, // regular path std::string str = step_fn_code_pattern; kernelSource = - string_format(str, pragma_str.c_str(), tname.c_str(), - vecParam ? vecSizeNames[i] : "", tname.c_str(), - vecSizeNames[i], tname.c_str(), vecSizeNames[i]); + str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecParam ? vecSizeNames[i] : "", tname.c_str(), + vecSizeNames[i], tname.c_str(), vecSizeNames[i]); } const char *programPtr = kernelSource.c_str(); err = @@ -229,10 +246,14 @@ int test_step_fn(cl_device_id device, cl_context context, return err; } - cl_int StepTest::Run() { cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_step_fn<half>(device, context, queue, num_elems, vecParam); + test_error(error, "StepTest::Run<cl_half> failed"); + } error = test_step_fn<float>(device, context, queue, num_elems, vecParam); test_error(error, "StepTest::Run<float> failed"); @@ -247,7 +268,6 @@ cl_int StepTest::Run() return error; } - int test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -255,7 +275,6 @@ int test_step(cl_device_id device, cl_context context, cl_command_queue queue, true); } - int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp index fed4389d..91b5c215 100644 --- a/test_conformance/commonfns/test_unary_fn.cpp +++ b/test_conformance/commonfns/test_unary_fn.cpp @@ -21,6 +21,7 @@ #include <vector> #include "harness/deviceInfo.h" +#include "harness/stringHelpers.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -30,7 +31,6 @@ #define M_PI 3.14159265358979323846264338327950288 #endif - // clang-format off const char *unary_fn_code_pattern = "%s\n" /* optional pragma */ @@ -51,23 +51,10 @@ const char *unary_fn_code_pattern_v3 = "}\n"; // clang-format on - #define MAX_ERR 2.0f namespace { - -template <typename T> float UlpFn(const T &val, const double &r) -{ - if (std::is_same<T, double>::value) - return Ulp_Error_Double(val, r); - else if (std::is_same<T, float>::value) - return Ulp_Error(val, r); - else if (std::is_same<T, half>::value) - return Ulp_Error(val, r); -} - - template <typename T> int verify_degrees(const T *const inptr, const T *const outptr, int n) { @@ -77,7 +64,11 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (180.0 / M_PI) * inptr[i]; + r = (180.0 / M_PI) * conv_to_dbl(inptr[i]); + + if (std::is_same<T, half>::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; error = UlpFn(outptr[i], r); @@ -88,21 +79,32 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same<T, half>::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same<T, half>::value) + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template <typename T> int verify_radians(const T *const inptr, const T *const outptr, int n) { @@ -112,8 +114,14 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) for (int i = 0, j = 0; i < n; i++, j++) { - r = (M_PI / 180.0) * inptr[i]; - error = Ulp_Error(outptr[i], r); + r = (M_PI / 180.0) * conv_to_dbl(inptr[i]); + + if (std::is_same<T, half>::value) + if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i])) + continue; + + error = UlpFn(outptr[i], r); + if (fabsf(error) > max_error) { max_error = error; @@ -121,41 +129,51 @@ int verify_radians(const T *const inptr, const T *const outptr, int n) max_val = r; if (fabsf(error) > MAX_ERR) { - log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", - i, inptr[i], r, outptr[i], r, outptr[i], error); + if (std::is_same<T, half>::value) + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r, + conv_to_flt(outptr[i]), error); + else + log_error( + "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i, + inptr[i], r, outptr[i], r, outptr[i], error); return 1; } } } - log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", - max_error, max_index, max_val, outptr[max_index], max_val, - outptr[max_index]); + if (std::is_same<T, half>::value) + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, conv_to_flt(outptr[max_index]), + max_val, conv_to_flt(outptr[max_index])); + else + log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n", + max_error, max_index, max_val, outptr[max_index], max_val, + outptr[max_index]); return 0; } - template <typename T> int verify_sign(const T *const inptr, const T *const outptr, int n) { - T r = 0; + double r = 0; for (int i = 0; i < n; i++) { - if (inptr[i] > 0.0f) + if (conv_to_dbl(inptr[i]) > 0.0f) r = 1.0; - else if (inptr[i] < 0.0f) + else if (conv_to_dbl(inptr[i]) < 0.0f) r = -1.0; else r = 0.0; - if (r != outptr[i]) return -1; + if (r != conv_to_dbl(outptr[i])) return -1; } return 0; } } - template <typename T> int test_unary_fn(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems, @@ -207,33 +225,38 @@ int test_unary_fn(cl_device_id device, cl_context context, get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d); } } + else if (std::is_same<T, half>::value) + { + pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"; + for (int j = 0; j < num_elements; j++) + { + input_ptr[j] = conv_to_half(get_random_float( + (float)(-10000.f * M_PI), (float)(10000.f * M_PI), d)); + } + } err = clEnqueueWriteBuffer(queue, streams[0], true, 0, sizeof(T) * num_elements, &input_ptr.front(), 0, NULL, NULL); - if (err != CL_SUCCESS) - { - log_error("clEnqueueWriteBuffer failed\n"); - return -1; - } + test_error(err, "clEnqueueWriteBuffer failed\n"); for (i = 0; i < kTotalVecCount; i++) { std::string kernelSource; - char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; + const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" }; if (i >= kVectorSizeCount) { std::string str = unary_fn_code_pattern_v3; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - tname.c_str(), fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + tname.c_str(), fnName.c_str()); } else { std::string str = unary_fn_code_pattern; - kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(), - vecSizeNames[i], tname.c_str(), - vecSizeNames[i], fnName.c_str()); + kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(), + vecSizeNames[i], tname.c_str(), + vecSizeNames[i], fnName.c_str()); } /* Create kernels */ @@ -290,11 +313,18 @@ int test_unary_fn(cl_device_id device, cl_context context, return err; } - cl_int DegreesTest::Run() { - cl_int error = test_unary_fn<float>(device, context, queue, num_elems, - fnName.c_str(), verify_degrees<float>); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn<half>(device, context, queue, num_elems, + fnName.c_str(), verify_degrees<half>); + test_error(error, "DegreesTest::Run<cl_half> failed"); + } + + error = test_unary_fn<float>(device, context, queue, num_elems, + fnName.c_str(), verify_degrees<float>); test_error(error, "DegreesTest::Run<float> failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -307,11 +337,18 @@ cl_int DegreesTest::Run() return error; } - cl_int RadiansTest::Run() { - cl_int error = test_unary_fn<float>(device, context, queue, num_elems, - fnName.c_str(), verify_radians<float>); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn<half>(device, context, queue, num_elems, + fnName.c_str(), verify_radians<half>); + test_error(error, "RadiansTest::Run<cl_half> failed"); + } + + error = test_unary_fn<float>(device, context, queue, num_elems, + fnName.c_str(), verify_radians<float>); test_error(error, "RadiansTest::Run<float> failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -324,11 +361,18 @@ cl_int RadiansTest::Run() return error; } - cl_int SignTest::Run() { - cl_int error = test_unary_fn<float>(device, context, queue, num_elems, - fnName.c_str(), verify_sign<float>); + cl_int error = CL_SUCCESS; + if (is_extension_available(device, "cl_khr_fp16")) + { + error = test_unary_fn<half>(device, context, queue, num_elems, + fnName.c_str(), verify_sign<half>); + test_error(error, "SignTest::Run<cl_half> failed"); + } + + error = test_unary_fn<float>(device, context, queue, num_elems, + fnName.c_str(), verify_sign<float>); test_error(error, "SignTest::Run<float> failed"); if (is_extension_available(device, "cl_khr_fp64")) @@ -341,7 +385,6 @@ cl_int SignTest::Run() return error; } - int test_degrees(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -349,7 +392,6 @@ int test_degrees(cl_device_id device, cl_context context, "degrees"); } - int test_radians(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { @@ -357,7 +399,6 @@ int test_radians(cl_device_id device, cl_context context, "radians"); } - int test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp index f3ee4312..3af8125a 100644 --- a/test_conformance/compiler/test_compile.cpp +++ b/test_conformance/compiler/test_compile.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,148 +16,147 @@ #include "testBase.h" #if defined(_WIN32) #include <time.h> -#elif defined(__linux__) || defined(__APPLE__) +#elif defined(__linux__) || defined(__APPLE__) #include <sys/time.h> #include <unistd.h> #endif #include "harness/conversions.h" #define MAX_LINE_SIZE_IN_PROGRAM 1024 -#define MAX_LOG_SIZE_IN_PROGRAM 2048 +#define MAX_LOG_SIZE_IN_PROGRAM 2048 const char *sample_kernel_start = -"__kernel void sample_test(__global float *src, __global int *dst)\n" -"{\n" -" float temp;\n" -" int tid = get_global_id(0);\n"; + "__kernel void sample_test(__global float *src, __global int *dst)\n" + "{\n" + " float temp = 0.0f;\n" + " int tid = get_global_id(0);\n"; const char *sample_kernel_end = "}\n"; -const char *sample_kernel_lines[] = { -"dst[tid] = src[tid];\n", -"dst[tid] = src[tid] * 3.f;\n", -"temp = src[tid] / 4.f;\n", -"dst[tid] = dot(temp,src[tid]);\n", -"dst[tid] = dst[tid] + temp;\n" }; +const char *sample_kernel_lines[] = { "dst[tid] = src[tid];\n", + "dst[tid] = src[tid] * 3.f;\n", + "temp = src[tid] / 4.f;\n", + "dst[tid] = dot(temp,src[tid]);\n", + "dst[tid] = dst[tid] + temp;\n" }; /* I compile and link therefore I am. Robert Ioffe */ -/* The following kernels are used in testing Improved Compilation and Linking feature */ - -const char *simple_kernel = -"__kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" int id = (int)get_global_id(0);\n" -" dst[id] = src[id];\n" -"}\n"; +/* The following kernels are used in testing Improved Compilation and Linking + * feature */ + +const char *simple_kernel = "__kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " int id = (int)get_global_id(0);\n" + " dst[id] = src[id];\n" + "}\n"; const char *simple_kernel_with_defines = -"__kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" int id = (int)get_global_id(0);\n" -" float temp = src[id] - 42;\n" -" dst[id] = FIRST + temp + SECOND;\n" -"}\n"; - -const char *simple_kernel_template = -"__kernel void\n" -"CopyBuffer%d(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" int id = (int)get_global_id(0);\n" -" dst[id] = src[id];\n" -"}\n"; - -const char *composite_kernel_start = -"__kernel void\n" -"CompositeKernel(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n"; + "__kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " int id = (int)get_global_id(0);\n" + " float temp = src[id] - 42;\n" + " dst[id] = FIRST + temp + SECOND;\n" + "}\n"; + +const char *simple_kernel_template = "__kernel void\n" + "CopyBuffer%d(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " int id = (int)get_global_id(0);\n" + " dst[id] = src[id];\n" + "}\n"; + +const char *composite_kernel_start = "__kernel void\n" + "CompositeKernel(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n"; const char *composite_kernel_end = "}\n"; -const char *composite_kernel_template = -" CopyBuffer%d(src, dst);\n"; - -const char *composite_kernel_extern_template = -"extern __kernel void\n" -"CopyBuffer%d(\n" -" __global float* src,\n" -" __global float* dst );\n"; - -const char *another_simple_kernel = -"extern __kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst );\n" -"__kernel void\n" -"AnotherCopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" CopyBuffer(src, dst);\n" -"}\n"; - -const char* simple_header = -"extern __kernel void\n" -"CopyBuffer(\n" -" __global float* src,\n" -" __global float* dst );\n"; - -const char* simple_header_name = "simple_header.h"; - -const char* another_simple_kernel_with_header = -"#include \"simple_header.h\"\n" -"__kernel void\n" -"AnotherCopyBuffer(\n" -" __global float* src,\n" -" __global float* dst )\n" -"{\n" -" CopyBuffer(src, dst);\n" -"}\n"; - -const char* header_name_templates[4] = { "simple_header%d.h", - "foo/simple_header%d.h", - "foo/bar/simple_header%d.h", - "foo/bar/baz/simple_header%d.h"}; - -const char* include_header_name_templates[4] = { "#include \"simple_header%d.h\"\n", - "#include \"foo/simple_header%d.h\"\n", - "#include \"foo/bar/simple_header%d.h\"\n", - "#include \"foo/bar/baz/simple_header%d.h\"\n"}; - -const char* compile_extern_var = "extern constant float foo;\n"; -const char* compile_extern_struct = "extern constant struct bar bart;\n"; -const char* compile_extern_function = "extern int baz(int, int);\n"; - -const char* compile_static_var = "static constant float foo = 2.78;\n"; -const char* compile_static_struct = "static constant struct bar {float x, y, z, r; int color; } foo = {3.14159};\n"; -const char* compile_static_function = "static int foo(int x, int y) { return x*x + y*y; }\n"; - -const char* compile_regular_var = "constant float foo = 4.0f;\n"; -const char* compile_regular_struct = "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, 0.f, 0};\n"; -const char* compile_regular_function = "int foo(int x, int y) { return x*x + y*y; }\n"; - -const char* link_static_var_access = // use with compile_static_var -"extern constant float foo;\n" -"float access_foo() { return foo; }\n"; - -const char* link_static_struct_access = // use with compile_static_struct -"extern constant struct bar{float x, y, z, r; int color; } foo;\n" -"struct bar access_foo() {return foo; }\n"; - -const char* link_static_function_access = // use with compile_static_function -"extern int foo(int, int);\n" -"int access_foo() { int blah = foo(3, 4); return blah + 5; }\n"; - -int test_large_single_compile(cl_context context, cl_device_id deviceID, unsigned int numLines) +const char *composite_kernel_template = " CopyBuffer%d(src, dst);\n"; + +const char *composite_kernel_extern_template = "extern __kernel void\n" + "CopyBuffer%d(\n" + " __global float* src,\n" + " __global float* dst );\n"; + +const char *another_simple_kernel = "extern __kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst );\n" + "__kernel void\n" + "AnotherCopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " CopyBuffer(src, dst);\n" + "}\n"; + +const char *simple_header = "extern __kernel void\n" + "CopyBuffer(\n" + " __global float* src,\n" + " __global float* dst );\n"; + +const char *simple_header_name = "simple_header.h"; + +const char *another_simple_kernel_with_header = "#include \"simple_header.h\"\n" + "__kernel void\n" + "AnotherCopyBuffer(\n" + " __global float* src,\n" + " __global float* dst )\n" + "{\n" + " CopyBuffer(src, dst);\n" + "}\n"; + +const char *header_name_templates[4] = { "simple_header%d.h", + "foo/simple_header%d.h", + "foo/bar/simple_header%d.h", + "foo/bar/baz/simple_header%d.h" }; + +const char *include_header_name_templates[4] = { + "#include \"simple_header%d.h\"\n", "#include \"foo/simple_header%d.h\"\n", + "#include \"foo/bar/simple_header%d.h\"\n", + "#include \"foo/bar/baz/simple_header%d.h\"\n" +}; + +const char *compile_extern_var = "extern constant float foo;\n"; +const char *compile_extern_struct = "extern constant struct bar bart;\n"; +const char *compile_extern_function = "extern int baz(int, int);\n"; + +const char *compile_static_var = "static constant float foo = 2.78;\n"; +const char *compile_static_struct = "static constant struct bar {float x, y, " + "z, r; int color; } foo = {3.14159};\n"; +const char *compile_static_function = + "static int foo(int x, int y) { return x*x + y*y; }\n"; + +const char *compile_regular_var = "constant float foo = 4.0f;\n"; +const char *compile_regular_struct = + "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, " + "0.f, 0};\n"; +const char *compile_regular_function = + "int foo(int x, int y) { return x*x + y*y; }\n"; + +const char *link_static_var_access = // use with compile_static_var + "extern constant float foo;\n" + "float access_foo() { return foo; }\n"; + +const char *link_static_struct_access = // use with compile_static_struct + "extern constant struct bar{float x, y, z, r; int color; } foo;\n" + "struct bar access_foo() {return foo; }\n"; + +const char *link_static_function_access = // use with compile_static_function + "extern int foo(int, int);\n" + "int access_foo() { int blah = foo(3, 4); return blah + 5; }\n"; + +int test_large_single_compile(cl_context context, cl_device_id deviceID, + unsigned int numLines) { int error; cl_program program; @@ -166,96 +165,113 @@ int test_large_single_compile(cl_context context, cl_device_id deviceID, unsigne MTdata d; /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( numLines * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__); + lines = (const char **)malloc(numLines * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First and last lines are easy */ - lines[ 0 ] = sample_kernel_start; - lines[ numLines - 1 ] = sample_kernel_end; + lines[0] = sample_kernel_start; + lines[numLines - 1] = sample_kernel_end; - numChoices = sizeof( sample_kernel_lines ) / sizeof( sample_kernel_lines[ 0 ] ); + numChoices = sizeof(sample_kernel_lines) / sizeof(sample_kernel_lines[0]); /* Fill the rest with random lines to hopefully prevent much optimization */ - d = init_genrand( gRandomSeed ); - for( i = 1; i < numLines - 1; i++ ) + d = init_genrand(gRandomSeed); + for (i = 1; i < numLines - 1; i++) { - lines[ i ] = sample_kernel_lines[ genrand_int32(d) % numChoices ]; + lines[i] = sample_kernel_lines[genrand_int32(d) % numChoices]; } - free_mtdata(d); d = NULL; + free_mtdata(d); + d = NULL; /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, numLines, lines); - if( program == NULL || error != CL_SUCCESS ) - { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); - free( lines ); + error = create_single_kernel_helper_create_program(context, &program, + numLines, lines); + if (program == NULL || error != CL_SUCCESS) + { + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)", + numLines, IGetErrorString(error), __FILE__, __LINE__); + free(lines); if (program != NULL) { - error = clReleaseProgram( program ); - test_error( error, "Unable to release a program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release a program object"); } return -1; } /* Build it */ - error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL ); - test_error( error, "Unable to build a long program" ); + error = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); + test_error(error, "Unable to build a long program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release a program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release a program object"); - free( lines ); + free(lines); return 0; } -int test_large_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_large_compile(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 64, 128, 256, 512, 1024, 2048, 4096, 0 }; //8192, 16384, 32768, 0 }; + unsigned int toTest[] = { + 64, 128, 256, 512, 1024, 2048, 4096, 0 + }; // 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing large compiles...this might take awhile...\n" ); + log_info("Testing large compiles...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_single_compile( context, deviceID, toTest[ i ] ) != 0 ) + if (test_large_single_compile(context, deviceID, toTest[i]) != 0) { - log_error( "ERROR: long program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__); + log_error( + "ERROR: long program test failed for %d lines! (in %s:%d)\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel); +static int verifyCopyBuffer(cl_context context, cl_command_queue queue, + cl_kernel kernel); #if defined(__APPLE__) || defined(__linux) #define _strdup strdup #endif -int test_large_multi_file_library(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multi_file_library(cl_context context, cl_device_id deviceID, + cl_command_queue queue, unsigned int numLines) { int error; cl_program program; @@ -264,164 +280,194 @@ int test_large_multi_file_library(cl_context context, cl_device_id deviceID, cl_ unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { free(simple_kernels); - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2* numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for( i = numLines + 1; i < 2* numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, + 2 * numLines + 2, lines); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); - free( simple_kernels ); - for( i = 0; i < numLines; i++) + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); + free(simple_kernels); + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)lines[i+numLines+1] ); + free((void *)lines[i]); + free((void *)lines[i + numLines + 1]); } - free( lines ); + free(lines); if (program != NULL) { - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); } return -1; } /* Compile it */ - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - simple_kernels[i] = clCreateProgramWithSource( context, 1, &kernel_source, NULL, &error ); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + simple_kernels[i] = + clCreateProgramWithSource(context, 1, &kernel_source, NULL, &error); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", numLines, + simple_kernels, NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); - /* Link the program that calls the kernels and the library that contains them */ + /* Link the program that calls the kernels and the library that contains + * them */ cl_program programs[2] = { program, my_newly_minted_library }; - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)lines[i+numLines+1] ); + free((void *)lines[i]); + free((void *)lines[i + numLines + 1]); } - free( lines ); + free(lines); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multi_file_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multi_file_libraries(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; + unsigned int toTest[] = { + 2, 4, 8, 16, 32, 64, 128, 256, 0 + }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing multi-file libraries ...this might take awhile...\n" ); + log_info("Testing multi-file libraries ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multi_file_library( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multi_file_library(context, deviceID, queue, toTest[i]) + != 0) { - log_error( "ERROR: multi-file library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multi-file library program test failed for %d " + "lines! (in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_large_multiple_embedded_headers(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_embedded_headers(cl_context context, + cl_device_id deviceID, + cl_command_queue queue, + unsigned int numLines) { int error; cl_program program; @@ -432,29 +478,41 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate simple_kernels array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate simple_kernels array with %d " + "lines! (in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } - headers = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (headers == NULL) { - log_error( "ERROR: Unable to allocate headers array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + headers = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (headers == NULL) + { + log_error("ERROR: Unable to allocate headers array with %d lines! (in " + "%s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - header_names = (const char**)malloc( numLines*sizeof( const char * ) ); - if (header_names == NULL) { - log_error( "ERROR: Unable to allocate header_names array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + header_names = (const char **)malloc(numLines * sizeof(const char *)); + if (header_names == NULL) + { + log_error("ERROR: Unable to allocate header_names array with %d lines! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } - lines = (const char **)malloc( (2*numLines + 2)*sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ ); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, include_header_name_templates[i % 4], i); lines[i] = _strdup(buffer); @@ -462,154 +520,178 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device header_names[i] = _strdup(buffer); sprintf(buffer, composite_kernel_extern_template, i); - const char* line = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &headers[i], 1, &line); - if( headers[i] == NULL || error != CL_SUCCESS ) + const char *line = buffer; + error = create_single_kernel_helper_create_program(context, &headers[i], + 1, &line); + if (headers[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__); + log_error("ERROR: Unable to create a simple header program! (%s in " + "%s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2* numLines + 1 ] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for( i = numLines + 1; i < 2* numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, + 2 * numLines + 2, lines); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(program, 1, &deviceID, NULL, numLines, headers, header_names, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, numLines, headers, + header_names, NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", numLines, + simple_kernels, NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); - /* Link the program that calls the kernels and the library that contains them */ + /* Link the program that calls the kernels and the library that contains + * them */ cl_program programs[2] = { program, my_newly_minted_library }; - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)header_names[i] ); + free((void *)lines[i]); + free((void *)header_names[i]); } - for( i = numLines + 1; i < 2* numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); - free( header_names ); + free(lines); + free(header_names); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); - error = clReleaseProgram( headers[i] ); - test_error( error, "Unable to release header program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); + error = clReleaseProgram(headers[i]); + test_error(error, "Unable to release header program object"); } - free( simple_kernels ); - free( headers ); + free(simple_kernels); + free(headers); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; + unsigned int toTest[] = { + 2, 4, 8, 16, 32, 64, 128, 256, 0 + }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing multiple embedded headers ...this might take awhile...\n" ); + log_info( + "Testing multiple embedded headers ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_embedded_headers( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_embedded_headers(context, deviceID, queue, + toTest[i]) + != 0) { - log_error( "ERROR: multiple embedded headers program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple embedded headers program test failed " + "for %d lines! (in %s:%d)\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -double logbase(double a, double base) -{ - return log(a) / log(base); -} +double logbase(double a, double base) { return log(a) / log(base); } -int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, + cl_command_queue queue, unsigned int numLines) { int error; cl_program *simple_kernels; @@ -617,164 +699,202 @@ int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_ unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; /* I want to create (log2(N)+1)/2 libraries */ - unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2; + unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001) / 2; unsigned int numLibraries = (unsigned int)pow(2.0, level - 1.0); - unsigned int numFilesInLib = numLines/numLibraries; - cl_program *my_program_and_libraries = (cl_program*)malloc((1+numLibraries)*sizeof(cl_program)); - if (my_program_and_libraries == NULL) { - log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries), __FILE__, __LINE__); + unsigned int numFilesInLib = numLines / numLibraries; + cl_program *my_program_and_libraries = + (cl_program *)malloc((1 + numLibraries) * sizeof(cl_program)); + if (my_program_and_libraries == NULL) + { + log_error("ERROR: Unable to allocate program array with %d programs! " + "(in %s:%d)\n", + (1 + numLibraries), __FILE__, __LINE__); return -1; } - log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib); + log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, + numLibraries, numFilesInLib); - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2*numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &my_program_and_libraries[0], 2 * numLines + 2, lines); - if( my_program_and_libraries[0] == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &my_program_and_libraries[0], 2 * numLines + 2, lines); + if (my_program_and_libraries[0] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_program_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_program_and_libraries[0], 1, &deviceID, NULL, 0, + NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - for(i = 0; i < numLibraries; i++) { - my_program_and_libraries[i+1] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+i*numFilesInLib, NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + for (i = 0; i < numLibraries; i++) + { + my_program_and_libraries[i + 1] = clLinkProgram( + context, 1, &deviceID, "-create-library", numFilesInLib, + simple_kernels + i * numFilesInLib, NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); } - /* Link the program that calls the kernels and the library that contains them */ - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1, my_program_and_libraries, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + /* Link the program that calls the kernels and the library that contains + * them */ + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, numLibraries + 1, + my_program_and_libraries, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - for(i = 0; i <= numLibraries; i++) { - error = clReleaseProgram( my_program_and_libraries[i] ); - test_error( error, "Unable to release program object" ); + for (i = 0; i <= numLibraries; i++) + { + error = clReleaseProgram(my_program_and_libraries[i]); + test_error(error, "Unable to release program object"); } - free( my_program_and_libraries ); - for(i = 0; i < numLines; i++) + free(my_program_and_libraries); + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); + free(lines); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_libraries(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 2, 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 }; + unsigned int toTest[] = { + 2, 8, 32, 128, 256, 0 + }; // 512, 2048, 8192, 32768, 0 }; unsigned int i; - log_info( "Testing multiple libraries ...this might take awhile...\n" ); + log_info("Testing multiple libraries ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_libraries(context, deviceID, queue, toTest[i]) + != 0) { - log_error( "ERROR: multiple library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple library program test failed for %d " + "lines! (in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_files_multiple_libraries(cl_context context, + cl_device_id deviceID, + cl_command_queue queue, + unsigned int numLines) { int error; cl_program *simple_kernels; @@ -782,915 +902,1173 @@ int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_i unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; /* I want to create (log2(N)+1)/4 libraries */ - unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2; + unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001) / 2; unsigned int numLibraries = (unsigned int)pow(2.0, level - 2.0); - unsigned int numFilesInLib = numLines/(2*numLibraries); - cl_program *my_programs_and_libraries = (cl_program*)malloc((1+numLibraries+numLibraries*numFilesInLib)*sizeof(cl_program)); - if (my_programs_and_libraries == NULL) { - log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries+numLibraries*numFilesInLib), __FILE__, __LINE__ ); + unsigned int numFilesInLib = numLines / (2 * numLibraries); + cl_program *my_programs_and_libraries = (cl_program *)malloc( + (1 + numLibraries + numLibraries * numFilesInLib) * sizeof(cl_program)); + if (my_programs_and_libraries == NULL) + { + log_error("ERROR: Unable to allocate program array with %d programs! " + "(in %s:%d)\n", + (1 + numLibraries + numLibraries * numFilesInLib), __FILE__, + __LINE__); return -1; } - log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib); + log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, + numLibraries, numFilesInLib); - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__ ); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ ); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2*numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &my_programs_and_libraries[0], 2 * numLines + 2, lines); - if( my_programs_and_libraries[0] == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &my_programs_and_libraries[0], 2 * numLines + 2, lines); + if (my_programs_and_libraries[0] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_programs_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_programs_and_libraries[0], 1, &deviceID, NULL, + 0, NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Copy already compiled kernels */ - for( i = 0; i < numLibraries*numFilesInLib; i++) { - my_programs_and_libraries[i+1] = simple_kernels[i]; + for (i = 0; i < numLibraries * numFilesInLib; i++) + { + my_programs_and_libraries[i + 1] = simple_kernels[i]; } /* Create library out of compiled templated kernels */ - for( i = 0; i < numLibraries; i++) { - my_programs_and_libraries[i+1+numLibraries*numFilesInLib] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+(i*numFilesInLib+numLibraries*numFilesInLib), NULL, NULL, &error); - test_error( error, "Unable to create a multi-line library" ); + for (i = 0; i < numLibraries; i++) + { + my_programs_and_libraries[i + 1 + numLibraries * numFilesInLib] = + clLinkProgram( + context, 1, &deviceID, "-create-library", numFilesInLib, + simple_kernels + + (i * numFilesInLib + numLibraries * numFilesInLib), + NULL, NULL, &error); + test_error(error, "Unable to create a multi-line library"); } - /* Link the program that calls the kernels and the library that contains them */ - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1+numLibraries*numFilesInLib, my_programs_and_libraries, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + /* Link the program that calls the kernels and the library that contains + * them */ + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, + numLibraries + 1 + numLibraries * numFilesInLib, + my_programs_and_libraries, NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - for(i = 0; i < numLibraries+1+numLibraries*numFilesInLib; i++) { - error = clReleaseProgram( my_programs_and_libraries[i] ); - test_error( error, "Unable to release program object" ); + for (i = 0; i < numLibraries + 1 + numLibraries * numFilesInLib; i++) + { + error = clReleaseProgram(my_programs_and_libraries[i]); + test_error(error, "Unable to release program object"); } - free( my_programs_and_libraries ); + free(my_programs_and_libraries); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); + free(lines); - for(i = numLibraries*numFilesInLib; i < numLines; i++) + for (i = numLibraries * numFilesInLib; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_files_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_files_multiple_libraries(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 }; + unsigned int toTest[] = { 8, 32, 128, 256, + 0 }; // 512, 2048, 8192, 32768, 0 }; unsigned int i; - log_info( "Testing multiple files and multiple libraries ...this might take awhile...\n" ); + log_info("Testing multiple files and multiple libraries ...this might take " + "awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_files_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_files_multiple_libraries(context, deviceID, + queue, toTest[i]) + != 0) { - log_error( "ERROR: multiple files, multiple libraries program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple files, multiple libraries program test " + "failed for %d lines! (in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_large_multiple_files(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_multiple_files(cl_context context, cl_device_id deviceID, + cl_command_queue queue, unsigned int numLines) { int error; const char **lines; unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; - cl_program *my_programs = (cl_program*)malloc((1+numLines)*sizeof(cl_program)); + cl_program *my_programs = + (cl_program *)malloc((1 + numLines) * sizeof(cl_program)); - if (my_programs == NULL) { - log_error( "ERROR: Unable to allocate my_programs array with %d programs! (in %s:%d)\n", (1+numLines), __FILE__, __LINE__); + if (my_programs == NULL) + { + log_error("ERROR: Unable to allocate my_programs array with %d " + "programs! (in %s:%d)\n", + (1 + numLines), __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2* numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &my_programs[0], 2 * numLines + 2, lines); - if( my_programs[0] == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &my_programs[0], + 2 * numLines + 2, lines); + if (my_programs[0] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_programs[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_programs[0], 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &my_programs[i + 1], 1, &kernel_source); - if( my_programs[i+1] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &my_programs[i + 1], 1, &kernel_source); + if (my_programs[i + 1] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(my_programs[i+1], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(my_programs[i + 1], 1, &deviceID, NULL, 0, + NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } - /* Link the program that calls the kernels and the library that contains them */ - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1+numLines, my_programs, NULL, NULL, &error); - test_error( error, "Unable to link a program with a library" ); + /* Link the program that calls the kernels and the library that contains + * them */ + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 1 + numLines, my_programs, + NULL, NULL, &error); + test_error(error, "Unable to link a program with a library"); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - for(i = 0; i < 1+numLines; i++) { - error = clReleaseProgram( my_programs[i] ); - test_error( error, "Unable to release program object" ); + for (i = 0; i < 1 + numLines; i++) + { + error = clReleaseProgram(my_programs[i]); + test_error(error, "Unable to release program object"); } - free( my_programs ); - for(i = 0; i < numLines; i++) + free(my_programs); + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { - free( (void*)lines[i] ); + free((void *)lines[i]); } - free( lines ); + free(lines); - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_multiple_files(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_multiple_files(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { - unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 }; + unsigned int toTest[] = { 8, 32, 128, 256, + 0 }; // 512, 2048, 8192, 32768, 0 }; unsigned int i; - log_info( "Testing multiple files compilation and linking into a single executable ...this might take awhile...\n" ); + log_info("Testing multiple files compilation and linking into a single " + "executable ...this might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_multiple_files( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_multiple_files(context, deviceID, queue, toTest[i]) != 0) { - log_error( "ERROR: multiple files program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ ); + log_error("ERROR: multiple files program test failed for %d lines! " + "(in %s:%d)\n\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } return 0; } -int test_simple_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_compile_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple compilation only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_static_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_static_compile_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple static compilations only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_var); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_static_var); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple static variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple static variable test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling a static variable...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple static variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple static variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_struct); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_static_struct); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple static struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple static struct test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling a static struct...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple static variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple static variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_function); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &program, 1, &compile_static_function); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple static function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple static function test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling a static function...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple static function program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple static function program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple extern compilations only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_header); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_header); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern kernel test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern kernel test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern kernel...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern kernel program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern kernel program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_var); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_extern_var); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern variable test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern variable...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_struct); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &compile_extern_struct); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern struct test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern struct...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern variable program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern variable program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_function); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &program, 1, &compile_extern_function); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple extern function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a simple extern function test " + "program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } log_info("Compiling an extern function...\n"); - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple extern function program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple extern function program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -struct simple_user_data { - const char* m_message; - cl_event m_event; +struct simple_user_data +{ + const char *m_message; + cl_event m_event; }; -const char* once_upon_a_midnight_dreary = "Once upon a midnight dreary!"; +const char *once_upon_a_midnight_dreary = "Once upon a midnight dreary!"; -static void CL_CALLBACK simple_compile_callback(cl_program program, void* user_data) +static void CL_CALLBACK simple_compile_callback(cl_program program, + void *user_data) { - simple_user_data* simple_compile_user_data = (simple_user_data*)user_data; - log_info("in the simple_compile_callback: program %p just completed compiling with '%s'\n", program, simple_compile_user_data->m_message); - if (strcmp(once_upon_a_midnight_dreary, simple_compile_user_data->m_message) != 0) + simple_user_data *simple_compile_user_data = (simple_user_data *)user_data; + log_info("in the simple_compile_callback: program %p just completed " + "compiling with '%s'\n", + program, simple_compile_user_data->m_message); + if (strcmp(once_upon_a_midnight_dreary, simple_compile_user_data->m_message) + != 0) { - log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s (in %s:%d)!\n", once_upon_a_midnight_dreary, simple_compile_user_data->m_message, __FILE__, __LINE__); + log_error("ERROR: in the simple_compile_callback: Expected '%s' and " + "got %s (in %s:%d)!\n", + once_upon_a_midnight_dreary, + simple_compile_user_data->m_message, __FILE__, __LINE__); } int error; - log_info("in the simple_compile_callback: program %p just completed compiling with '%p'\n", program, simple_compile_user_data->m_event); + log_info("in the simple_compile_callback: program %p just completed " + "compiling with '%p'\n", + program, simple_compile_user_data->m_event); - error = clSetUserEventStatus(simple_compile_user_data->m_event, CL_COMPLETE); + error = + clSetUserEventStatus(simple_compile_user_data->m_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: in the simple_compile_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: in the simple_compile_callback: Unable to set user " + "event status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the simple_compile_callback: Successfully signaled compile_program_completion_event!\n"); + log_info("in the simple_compile_callback: Successfully signaled " + "compile_program_completion_event!\n"); } -int test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; cl_event compile_program_completion_event; log_info("Testing a simple compilation with callback...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } compile_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - simple_user_data simple_compile_user_data = {once_upon_a_midnight_dreary, compile_program_completion_event}; + simple_user_data simple_compile_user_data = { + once_upon_a_midnight_dreary, compile_program_completion_event + }; - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, simple_compile_callback, (void*)&simple_compile_user_data); - test_error( error, "Unable to compile a simple program with a callback" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, + simple_compile_callback, + (void *)&simple_compile_user_data); + test_error(error, "Unable to compile a simple program with a callback"); error = clWaitForEvents(1, &compile_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event"); + test_error(error, + "clWaitForEvents failed when waiting on " + "compile_program_completion_event"); /* All done! */ error = clReleaseEvent(compile_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_embedded_header_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_embedded_header_compile(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, header; log_info("Testing a simple embedded header compile only...\n"); - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); - if( header == NULL || error != CL_SUCCESS ) + header = + clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); + if (header == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL); - test_error( error, "Unable to compile a simple program with embedded header" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, + &simple_header_name, NULL, NULL); + test_error(error, + "Unable to compile a simple program with embedded header"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( header ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(header); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_link_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_link_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing a simple linking only...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_regular_variable_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_regular_variable_access(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, second_program, my_newly_linked_program; - const char* sources[2] = {simple_kernel, compile_regular_var}; // here we want to avoid linking error due to lack of kernels - log_info("Compiling and linking two program objects, where one tries to access regular variable from another...\n"); - error = create_single_kernel_helper_create_program(context, &program, 2, sources); - if( program == NULL || error != CL_SUCCESS ) + const char *sources[2] = { + simple_kernel, compile_regular_var + }; // here we want to avoid linking error due to lack of kernels + log_info("Compiling and linking two program objects, where one tries to " + "access regular variable from another...\n"); + error = create_single_kernel_helper_create_program(context, &program, 2, + sources); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program with regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program with regular " + "variable! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program with regular function" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, + "Unable to compile a simple program with regular function"); - error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_var_access); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &second_program, 1, &link_static_var_access); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program that tries to access a regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program that tries to access " + "a regular variable! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a program that tries to access a regular variable" ); + error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error( + error, + "Unable to compile a program that tries to access a regular variable"); cl_program two_programs[2] = { program, second_program }; - my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error); - test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular variable" ); + my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, + two_programs, NULL, NULL, &error); + test_error(error, + "clLinkProgram: Expected a different error code while linking a " + "program that tries to access a regular variable"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( second_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(second_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_regular_struct_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_regular_struct_access(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, second_program, my_newly_linked_program; - const char* sources[2] = {simple_kernel, compile_regular_struct}; // here we want to avoid linking error due to lack of kernels - log_info("Compiling and linking two program objects, where one tries to access regular struct from another...\n"); - error = create_single_kernel_helper_create_program(context, &program, 2, sources); - if( program == NULL || error != CL_SUCCESS ) + const char *sources[2] = { + simple_kernel, compile_regular_struct + }; // here we want to avoid linking error due to lack of kernels + log_info("Compiling and linking two program objects, where one tries to " + "access regular struct from another...\n"); + error = create_single_kernel_helper_create_program(context, &program, 2, + sources); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program with regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program with regular struct! " + "(%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program with regular struct" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program with regular struct"); - error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_struct_access); - if( second_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &second_program, 1, &link_static_struct_access); + if (second_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program that tries to access a regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program that tries to access " + "a regular struct! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a program that tries to access a regular struct" ); + error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error( + error, + "Unable to compile a program that tries to access a regular struct"); cl_program two_programs[2] = { program, second_program }; - my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error); - test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular struct" ); + my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, + two_programs, NULL, NULL, &error); + test_error(error, + "clLinkProgram: Expected a different error code while linking a " + "program that tries to access a regular struct"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( second_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(second_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_regular_function_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_regular_function_access(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, second_program, my_newly_linked_program; - const char* sources[2] = {simple_kernel, compile_regular_function}; // here we want to avoid linking error due to lack of kernels - log_info("Compiling and linking two program objects, where one tries to access regular function from another...\n"); - error = create_single_kernel_helper_create_program(context, &program, 2, sources); - if( program == NULL || error != CL_SUCCESS ) + const char *sources[2] = { + simple_kernel, compile_regular_function + }; // here we want to avoid linking error due to lack of kernels + log_info("Compiling and linking two program objects, where one tries to " + "access regular function from another...\n"); + error = create_single_kernel_helper_create_program(context, &program, 2, + sources); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program with regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program with regular " + "function! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program with regular function" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, + "Unable to compile a simple program with regular function"); - error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_function_access); - if( second_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &second_program, 1, &link_static_function_access); + if (second_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a test program that tries to access a regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create a test program that tries to access " + "a regular function! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a program that tries to access a regular function" ); + error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error( + error, + "Unable to compile a program that tries to access a regular function"); cl_program two_programs[2] = { program, second_program }; - my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error); - test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular function" ); + my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, + two_programs, NULL, NULL, &error); + test_error(error, + "clLinkProgram: Expected a different error code while linking a " + "program that tries to access a regular function"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( second_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(second_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, header, simple_program; log_info("Testing a simple embedded header link...\n"); - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); - if( header == NULL || error != CL_SUCCESS ) + header = + clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); + if (header == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL); - test_error( error, "Unable to compile a simple program with embedded header" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, + &simple_header_name, NULL, NULL); + test_error(error, + "Unable to compile a simple program with embedded header"); - error = create_single_kernel_helper_create_program(context, &simple_program, 1, &simple_kernel); - if( simple_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &simple_program, + 1, &simple_kernel); + if (simple_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, simple_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from two binaries, one compiled " + "with embedded header"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( header ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(header); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( simple_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -const char* when_i_pondered_weak_and_weary = "When I pondered weak and weary!"; +const char *when_i_pondered_weak_and_weary = "When I pondered weak and weary!"; -static void CL_CALLBACK simple_link_callback(cl_program program, void* user_data) +static void CL_CALLBACK simple_link_callback(cl_program program, + void *user_data) { - simple_user_data* simple_link_user_data = (simple_user_data*)user_data; - log_info("in the simple_link_callback: program %p just completed linking with '%s'\n", program, (const char*)simple_link_user_data->m_message); - if (strcmp(when_i_pondered_weak_and_weary, simple_link_user_data->m_message) != 0) + simple_user_data *simple_link_user_data = (simple_user_data *)user_data; + log_info("in the simple_link_callback: program %p just completed linking " + "with '%s'\n", + program, (const char *)simple_link_user_data->m_message); + if (strcmp(when_i_pondered_weak_and_weary, simple_link_user_data->m_message) + != 0) { - log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s! (in %s:%d)\n", when_i_pondered_weak_and_weary, simple_link_user_data->m_message, __FILE__, __LINE__); + log_error("ERROR: in the simple_compile_callback: Expected '%s' and " + "got %s! (in %s:%d)\n", + when_i_pondered_weak_and_weary, + simple_link_user_data->m_message, __FILE__, __LINE__); } int error; - log_info("in the simple_link_callback: program %p just completed linking with '%p'\n", program, simple_link_user_data->m_event); + log_info("in the simple_link_callback: program %p just completed linking " + "with '%p'\n", + program, simple_link_user_data->m_event); error = clSetUserEventStatus(simple_link_user_data->m_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: simple_link_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: simple_link_callback: Unable to set user event " + "status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the simple_link_callback: Successfully signaled link_program_completion_event event!\n"); + log_info("in the simple_link_callback: Successfully signaled " + "link_program_completion_event event!\n"); } -int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; cl_event link_program_completion_event; log_info("Testing a simple linking with callback...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); link_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event}; + simple_user_data simple_link_user_data = { when_i_pondered_weak_and_weary, + link_program_completion_event }; - cl_program my_linked_library = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, simple_link_callback, (void*)&simple_link_user_data, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_linked_library = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, simple_link_callback, + (void *)&simple_link_user_data, &error); + test_error(error, "Unable to link a simple program"); error = clWaitForEvents(1, &link_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event"); + test_error( + error, + "clWaitForEvents failed when waiting on link_program_completion_event"); /* All done! */ error = clReleaseEvent(link_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_linked_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_linked_library); + test_error(error, "Unable to release program object"); return 0; } -static void initBuffer(float* & srcBuffer, unsigned int cnDimension) +static void initBuffer(float *&srcBuffer, unsigned int cnDimension) { float num = 0.0f; - for( unsigned int i = 0; i < cnDimension; i++ ) + for (unsigned int i = 0; i < cnDimension; i++) { - if( ( i % 10 ) == 0 ) + if ((i % 10) == 0) { num = 0.0f; } - srcBuffer[ i ] = num; + srcBuffer[i] = num; num = num + 1.0f; } } -static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel) +static int verifyCopyBuffer(cl_context context, cl_command_queue queue, + cl_kernel kernel) { int error, result = CL_SUCCESS; const size_t cnDimension = 32; // Allocate source buffer - float * srcBuffer = (float*)malloc(cnDimension * sizeof(float)); - float * dstBuffer = (float*)malloc(cnDimension * sizeof(float)); + float *srcBuffer = (float *)malloc(cnDimension * sizeof(float)); + float *dstBuffer = (float *)malloc(cnDimension * sizeof(float)); - if (srcBuffer == NULL) { - log_error( "ERROR: Unable to allocate srcBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__); + if (srcBuffer == NULL) + { + log_error("ERROR: Unable to allocate srcBuffer float array with %lu " + "floats! (in %s:%d)\n", + cnDimension, __FILE__, __LINE__); return -1; } - if (dstBuffer == NULL) { - log_error( "ERROR: Unable to allocate dstBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__); + if (dstBuffer == NULL) + { + log_error("ERROR: Unable to allocate dstBuffer float array with %lu " + "floats! (in %s:%d)\n", + cnDimension, __FILE__, __LINE__); return -1; } - if( srcBuffer && dstBuffer ) + if (srcBuffer && dstBuffer) { // initialize host memory - initBuffer(srcBuffer, cnDimension ); + initBuffer(srcBuffer, cnDimension); // Allocate device memory - cl_mem deviceMemSrc = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - cnDimension * sizeof( cl_float ), srcBuffer, &error); - test_error( error, "Unable to create a source memory buffer" ); + cl_mem deviceMemSrc = + clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + cnDimension * sizeof(cl_float), srcBuffer, &error); + test_error(error, "Unable to create a source memory buffer"); - cl_mem deviceMemDst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, - cnDimension * sizeof( cl_float ), 0, &error); - test_error( error, "Unable to create a destination memory buffer" ); + cl_mem deviceMemDst = + clCreateBuffer(context, CL_MEM_WRITE_ONLY, + cnDimension * sizeof(cl_float), 0, &error); + test_error(error, "Unable to create a destination memory buffer"); // Set kernel args // Set parameter 0 to be the source buffer - error = clSetKernelArg(kernel, 0, sizeof( cl_mem ), ( void * )&deviceMemSrc ); - test_error( error, "Unable to set the first kernel argument" ); + error = + clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&deviceMemSrc); + test_error(error, "Unable to set the first kernel argument"); // Set parameter 1 to be the destination buffer - error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), ( void * )&deviceMemDst ); - test_error( error, "Unable to set the second kernel argument" ); + error = + clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&deviceMemDst); + test_error(error, "Unable to set the second kernel argument"); // Execute kernel - error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, - &cnDimension, 0, 0, NULL, NULL ); - test_error( error, "Unable to enqueue kernel" ); + error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &cnDimension, 0, + 0, NULL, NULL); + test_error(error, "Unable to enqueue kernel"); - error = clFlush( queue ); - test_error( error, "Unable to flush the queue" ); + error = clFlush(queue); + test_error(error, "Unable to flush the queue"); // copy results from device back to host - error = clEnqueueReadBuffer(queue, deviceMemDst, CL_TRUE, 0, cnDimension * sizeof( cl_float ), - dstBuffer, 0, NULL, NULL ); - test_error( error, "Unable to read the destination buffer" ); + error = clEnqueueReadBuffer(queue, deviceMemDst, CL_TRUE, 0, + cnDimension * sizeof(cl_float), dstBuffer, + 0, NULL, NULL); + test_error(error, "Unable to read the destination buffer"); - error = clFlush( queue ); - test_error( error, "Unable to flush the queue" ); + error = clFlush(queue); + test_error(error, "Unable to flush the queue"); // Compare the source and destination buffers - const int* pSrc = (int*)srcBuffer; - const int* pDst = (int*)dstBuffer; + const int *pSrc = (int *)srcBuffer; + const int *pDst = (int *)dstBuffer; int mismatch = 0; - for( size_t i = 0; i < cnDimension; i++ ) + for (size_t i = 0; i < cnDimension; i++) { - if( pSrc[i] != pDst[i] ) + if (pSrc[i] != pDst[i]) { - if( mismatch < 4 ) + if (mismatch < 4) { - log_info("Offset %08lX: Expected %08X, Got %08X\n", i * 4, pSrc[i], pDst[i] ); + log_info("Offset %08lX: Expected %08X, Got %08X\n", i * 4, + pSrc[i], pDst[i]); } else { @@ -1700,9 +2078,9 @@ static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kerne } } - if( mismatch ) + if (mismatch) { - log_info("*** %d mismatches found, TEST FAILS! ***\n", mismatch ); + log_info("*** %d mismatches found, TEST FAILS! ***\n", mismatch); result = -1; } else @@ -1710,806 +2088,989 @@ static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kerne log_info("Buffers match, test passes.\n"); } - free( srcBuffer ); + free(srcBuffer); srcBuffer = NULL; - free( dstBuffer ); + free(dstBuffer); dstBuffer = NULL; - if( deviceMemSrc ) + if (deviceMemSrc) { - error = clReleaseMemObject( deviceMemSrc ); - test_error( error, "Unable to release memory object" ); + error = clReleaseMemObject(deviceMemSrc); + test_error(error, "Unable to release memory object"); } - if( deviceMemDst ) + if (deviceMemDst) { - error = clReleaseMemObject( deviceMemDst ); - test_error( error, "Unable to release memory object" ); + error = clReleaseMemObject(deviceMemDst); + test_error(error, "Unable to release memory object"); } } return result; } -int test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program; log_info("Testing execution after a simple compile and link...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_simple_compile_and_link_no_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link_no_device_info( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { int error; cl_program program; - log_info("Testing execution after a simple compile and link with no device information provided...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing execution after a simple compile and link with no device " + "information provided...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } error = clCompileProgram(program, 0, NULL, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 0, NULL, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 0, NULL, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link_with_defines( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { int error; cl_program program; - log_info("Testing execution after a simple compile and link with defines...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel_with_defines, "-DFIRST=5 -DSECOND=37"); - if( program == NULL || error != CL_SUCCESS ) + log_info( + "Testing execution after a simple compile and link with defines...\n"); + error = create_single_kernel_helper_create_program( + context, &program, 1, &simple_kernel_with_defines, + "-DFIRST=5 -DSECOND=37"); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, "-DFIRST=5 -DSECOND=37", 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, "-DFIRST=5 -DSECOND=37", 0, + NULL, NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = clLinkProgram( + context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_serialize_reload_object(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program; - size_t binarySize; + size_t binarySize; unsigned char *binary; - log_info("Testing execution after serialization and reloading of the object...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing execution after serialization and reloading of the " + "object...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ ); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with %lu " + "characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + log_error("ERROR: Expected size of binaries array in clGetProgramInfo " + "is incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buffers), + &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - cl_program program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); + cl_program program_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program_with_binary, NULL, NULL, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 1, &program_with_binary, + NULL, NULL, &error); + test_error(error, "Unable to link a simple program"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( program_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program_with_binary); + test_error(error, "Unable to release program object"); free(binary); return 0; } -int test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_serialize_reload_library(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, another_program; - size_t binarySize; + size_t binarySize; unsigned char *binary; - log_info("Testing execution after linking a binary with a simple library...\n"); + log_info( + "Testing execution after linking a binary with a simple library...\n"); // we will test creation of a simple library from one file - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); // Get the size of the resulting library (only one device) - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters (in %s:%d)!", binarySize, __FILE__, __LINE__); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with %lu " + "characters (in %s:%d)!", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) - { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, 0, + NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) + { + log_error("ERROR: Expected size of binaries array in clGetProgramInfo " + "is incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, + sizeof(buffers), &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); + cl_program library_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, library_with_binary }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + library_with_binary }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( library_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(library_with_binary); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); free(binary); return 0; } -static void CL_CALLBACK program_compile_completion_callback(cl_program program, void* user_data) +static void CL_CALLBACK program_compile_completion_callback(cl_program program, + void *user_data) { int error; cl_event compile_program_completion_event = (cl_event)user_data; - log_info("in the program_compile_completion_callback: program %p just completed compiling with '%p'\n", program, compile_program_completion_event); + log_info("in the program_compile_completion_callback: program %p just " + "completed compiling with '%p'\n", + program, compile_program_completion_event); error = clSetUserEventStatus(compile_program_completion_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: in the program_compile_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: in the program_compile_completion_callback: Unable " + "to set user event status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the program_compile_completion_callback: Successfully signaled compile_program_completion_event event!\n"); + log_info("in the program_compile_completion_callback: Successfully " + "signaled compile_program_completion_event event!\n"); } -static void CL_CALLBACK program_link_completion_callback(cl_program program, void* user_data) +static void CL_CALLBACK program_link_completion_callback(cl_program program, + void *user_data) { int error; cl_event link_program_completion_event = (cl_event)user_data; - log_info("in the program_link_completion_callback: program %p just completed linking with '%p'\n", program, link_program_completion_event); + log_info("in the program_link_completion_callback: program %p just " + "completed linking with '%p'\n", + program, link_program_completion_event); error = clSetUserEventStatus(link_program_completion_event, CL_COMPLETE); if (error != CL_SUCCESS) { - log_error( "ERROR: in the program_link_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: in the program_link_completion_callback: Unable to " + "set user event status to CL_COMPLETE! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); exit(-1); } - log_info("in the program_link_completion_callback: Successfully signaled link_program_completion_event event!\n"); + log_info("in the program_link_completion_callback: Successfully signaled " + "link_program_completion_event event!\n"); } -int test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_compile_and_link_with_callbacks( + cl_device_id deviceID, cl_context context, cl_command_queue queue, + int num_elements) { int error; cl_program program; cl_event compile_program_completion_event, link_program_completion_event; - log_info("Testing execution after a simple compile and link with callbacks...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing execution after a simple compile and link with " + "callbacks...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } compile_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, - program_compile_completion_callback, (void*)compile_program_completion_event); - test_error( error, "Unable to compile a simple program" ); + program_compile_completion_callback, + (void *)compile_program_completion_event); + test_error(error, "Unable to compile a simple program"); error = clWaitForEvents(1, &compile_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event"); + test_error(error, + "clWaitForEvents failed when waiting on " + "compile_program_completion_event"); error = clReleaseEvent(compile_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); link_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, - program_link_completion_callback, (void*)link_program_completion_event, &error); - test_error( error, "Unable to link a simple program" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 1, &program, + program_link_completion_callback, + (void *)link_program_completion_event, &error); + test_error(error, "Unable to link a simple program"); error = clWaitForEvents(1, &link_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event"); + test_error( + error, + "clWaitForEvents failed when waiting on link_program_completion_event"); error = clReleaseEvent(link_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_library_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_library_only(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; log_info("Testing creation of a simple library...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_library_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_library_with_callback(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program; cl_event link_program_completion_event; log_info("Testing creation of a simple library with a callback...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); link_program_completion_event = clCreateUserEvent(context, &error); - test_error( error, "Unable to create a user event"); + test_error(error, "Unable to create a user event"); - simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event}; + simple_user_data simple_link_user_data = { when_i_pondered_weak_and_weary, + link_program_completion_event }; - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, - simple_link_callback, (void*)&simple_link_user_data, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = clLinkProgram( + context, 1, &deviceID, "-create-library", 1, &program, + simple_link_callback, (void *)&simple_link_user_data, &error); + test_error(error, "Unable to create a simple library"); error = clWaitForEvents(1, &link_program_completion_event); - test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event"); + test_error( + error, + "clWaitForEvents failed when waiting on link_program_completion_event"); /* All done! */ error = clReleaseEvent(link_program_completion_event); - test_error( error, "Unable to release event object" ); + test_error(error, "Unable to release event object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); return 0; } -int test_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_simple_library_with_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, another_program; log_info("Testing creation and linking with a simple library...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, my_newly_minted_library }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + my_newly_minted_library }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_simple_library_with_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, another_program; - log_info("Testing execution after linking a binary with a simple library...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info( + "Testing execution after linking a binary with a simple library...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, + NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, my_newly_minted_library }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + my_newly_minted_library }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_two_file_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, another_program; log_info("Testing two file compiling and linking...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, another_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, "Unable to create an executable from two binaries"); /* All done! */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; cl_program program, another_program; - log_info("Testing two file compiling and linking and execution of two kernels afterwards ...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + log_info("Testing two file compiling and linking and execution of two " + "kernels afterwards ...\n"); + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, another_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, "Unable to create an executable from two binaries"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_embedded_header_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, header, simple_program; log_info("Testing execution after embedded header link...\n"); // we will test execution after compiling and linking with embedded headers - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); - if( header == NULL || error != CL_SUCCESS ) + header = + clCreateProgramWithSource(context, 1, &simple_header, NULL, &error); + if (header == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL); - test_error( error, "Unable to compile a simple program with embedded header" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, + &simple_header_name, NULL, NULL); + test_error(error, + "Unable to compile a simple program with embedded header"); - simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); - if( simple_program == NULL || error != CL_SUCCESS ) + simple_program = + clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); + if (simple_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, simple_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from two binaries, one compiled " + "with embedded header"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( header ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(header); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( simple_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } #if defined(__APPLE__) || defined(__linux) -#define _mkdir(x) mkdir(x,S_IRWXU) +#define _mkdir(x) mkdir(x, S_IRWXU) #define _chdir chdir #define _rmdir rmdir #define _unlink unlink @@ -2517,461 +3078,602 @@ int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context co #include <direct.h> #endif -int test_execute_after_included_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_execute_after_included_header_link(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { int error; cl_program program, simple_program; log_info("Testing execution after included header link...\n"); // we will test execution after compiling and linking with included headers - program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error); - if( program == NULL || error != CL_SUCCESS ) + program = clCreateProgramWithSource( + context, 1, &another_simple_kernel_with_header, NULL, &error); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } /* setup */ -#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ )) +#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) /* Some tests systems doesn't allow one to write in the test directory */ - if (_chdir("/tmp") != 0) { - log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("/tmp") != 0) + { + log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } #endif - if (_mkdir("foo") != 0) { - log_error( "ERROR: Unable to create directory foo! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_mkdir("foo") != 0) + { + log_error("ERROR: Unable to create directory foo! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_mkdir("foo/bar") != 0) { - log_error( "ERROR: Unable to create directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_mkdir("foo/bar") != 0) + { + log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_chdir("foo/bar") != 0) { - log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("foo/bar") != 0) + { + log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - FILE* simple_header_file = fopen(simple_header_name, "w"); - if (simple_header_file == NULL) { - log_error( "ERROR: Unable to create simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ ); + FILE *simple_header_file = fopen(simple_header_name, "w"); + if (simple_header_file == NULL) + { + log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (fprintf(simple_header_file, "%s", simple_header) < 0) { - log_error( "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__); + if (fprintf(simple_header_file, "%s", simple_header) < 0) + { + log_error( + "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (fclose(simple_header_file) != 0) { - log_error( "ERROR: Unable to close simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__); + if (fclose(simple_header_file) != 0) + { + log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (_chdir("../..") != 0) { - log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__); + if (_chdir("../..") != 0) + { + log_error("ERROR: Unable to change to original working directory! (in " + "%s:%d)\n", + __FILE__, __LINE__); return -1; } -#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ )) - error = clCompileProgram(program, 1, &deviceID, "-I/tmp/foo/bar", 0, NULL, NULL, NULL, NULL); +#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__)) + error = clCompileProgram(program, 1, &deviceID, "-I/tmp/foo/bar", 0, NULL, + NULL, NULL, NULL); #else - error = clCompileProgram(program, 1, &deviceID, "-Ifoo/bar", 0, NULL, NULL, NULL, NULL); + error = clCompileProgram(program, 1, &deviceID, "-Ifoo/bar", 0, NULL, NULL, + NULL, NULL); #endif - test_error( error, "Unable to compile a simple program with included header" ); + test_error(error, + "Unable to compile a simple program with included header"); /* cleanup */ - if (_chdir("foo/bar") != 0) { - log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("foo/bar") != 0) + { + log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_unlink(simple_header_name) != 0) { - log_error( "ERROR: Unable to remove simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ ); + if (_unlink(simple_header_name) != 0) + { + log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n", + simple_header_name, __FILE__, __LINE__); return -1; } - if (_chdir("../..") != 0) { - log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_chdir("../..") != 0) + { + log_error("ERROR: Unable to change to original working directory! (in " + "%s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_rmdir("foo/bar") != 0) { - log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_rmdir("foo/bar") != 0) + { + log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - if (_rmdir("foo") != 0) { - log_error( "ERROR: Unable to remove directory foo! (in %s:%d)\n", __FILE__, __LINE__ ); + if (_rmdir("foo") != 0) + { + log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n", + __FILE__, __LINE__); return -1; } - simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); - if( simple_program == NULL || error != CL_SUCCESS ) + simple_program = + clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error); + if (simple_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); cl_program two_programs[2] = { program, simple_program }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); - test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" ); + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from two binaries, one compiled " + "with embedded header"); - cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = + clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( simple_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_program_binary_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_program_binary_type(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) { int error; - cl_program program, another_program, program_with_binary, fully_linked_program_with_binary; + cl_program program, another_program, program_with_binary, + fully_linked_program_with_binary; cl_program_binary_type program_type = -1; size_t size; - size_t binarySize; + size_t binarySize; unsigned char *binary; log_info("Testing querying of program binary type...\n"); - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, + NULL); + test_error(error, "Unable to compile a simple program"); - error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, + NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { - log_error( "ERROR: Expected program type of a just compiled program to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a just compiled program to " + "be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary { - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ ); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with " + "%lu characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + log_error( + "ERROR: Expected size of binaries array in clGetProgramInfo is " + "incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buffers), + &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); - - error = clGetProgramBuildInfo (program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + program_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); + + error = clGetProgramBuildInfo( + program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT) { - log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a program created from " + "compiled object to be " + "CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; free(binary); } - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program_with_binary, NULL, NULL, &error); - test_error( error, "Unable to create a simple library" ); - error = clGetProgramBuildInfo (my_newly_minted_library, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + cl_program my_newly_minted_library = + clLinkProgram(context, 1, &deviceID, "-create-library", 1, + &program_with_binary, NULL, NULL, &error); + test_error(error, "Unable to create a simple library"); + error = clGetProgramBuildInfo( + my_newly_minted_library, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY) { - log_error( "ERROR: Expected program type of a just linked library to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a just linked library to be " + "CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; // Get the size of the resulting library (only one device) - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with %lu " + "characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) - { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, 0, + NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) + { + log_error("ERROR: Expected size of binaries array in clGetProgramInfo " + "is incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, + sizeof(buffers), &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); - error = clGetProgramBuildInfo (library_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + cl_program library_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); + error = clGetProgramBuildInfo( + library_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY) { - log_error( "ERROR: Expected program type of a library loaded with binary to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a library loaded with " + "binary to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; - free(binary); + free(binary); - error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel); - if( another_program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program( + context, &another_program, 1, &another_simple_kernel); + if (another_program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } - error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, + NULL, NULL); + test_error(error, "Unable to compile a simple program"); - cl_program program_and_archive[2] = { another_program, library_with_binary }; - cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); - test_error( error, "Unable to create an executable from a binary and a library" ); + cl_program program_and_archive[2] = { another_program, + library_with_binary }; + cl_program fully_linked_program = clLinkProgram( + context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error); + test_error(error, + "Unable to create an executable from a binary and a library"); - error = clGetProgramBuildInfo (fully_linked_program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + error = clGetProgramBuildInfo( + fully_linked_program, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { - log_error( "ERROR: Expected program type of a newly build executable to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a newly build executable to " + "be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; // Get the size of the resulting binary (only one device) - error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL ); - test_error( error, "Unable to get binary size" ); + error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARY_SIZES, + sizeof(binarySize), &binarySize, NULL); + test_error(error, "Unable to get binary size"); // Sanity check - if( binarySize == 0 ) + if (binarySize == 0) { - log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Binary size of program is zero (in %s:%d)\n", + __FILE__, __LINE__); return -1; } // Create a buffer and get the actual binary { - binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize); - if (binary == NULL) { - log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ ); + binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize); + if (binary == NULL) + { + log_error("ERROR: Unable to allocate binary character array with " + "%lu characters! (in %s:%d)\n", + binarySize, __FILE__, __LINE__); return -1; } - unsigned char *buffers[ 1 ] = { binary }; - cl_int loadErrors[ 1 ]; + unsigned char *buffers[1] = { binary }; + cl_int loadErrors[1]; // Do another sanity check here first size_t size; - error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, 0, NULL, &size ); - test_error( error, "Unable to get expected size of binaries array" ); - if( size != sizeof( buffers ) ) + error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARIES, 0, + NULL, &size); + test_error(error, "Unable to get expected size of binaries array"); + if (size != sizeof(buffers)) { - log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ ); + log_error( + "ERROR: Expected size of binaries array in clGetProgramInfo is " + "incorrect (should be %d, got %d) (in %s:%d)\n", + (int)sizeof(buffers), (int)size, __FILE__, __LINE__); free(binary); return -1; } - error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL ); - test_error( error, "Unable to get program binary" ); + error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARIES, + sizeof(buffers), &buffers, NULL); + test_error(error, "Unable to get program binary"); // use clCreateProgramWithBinary - fully_linked_program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error); - test_error( error, "Unable to create program with binary" ); - - error = clGetProgramBuildInfo (fully_linked_program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + fully_linked_program_with_binary = clCreateProgramWithBinary( + context, 1, &deviceID, &binarySize, (const unsigned char **)buffers, + loadErrors, &error); + test_error(error, "Unable to create program with binary"); + + error = clGetProgramBuildInfo( + fully_linked_program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { - log_error( "ERROR: Expected program type of a program created from a fully linked executable binary to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: Expected program type of a program created from " + "a fully linked executable binary to be " + "CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; free(binary); } - error = clBuildProgram(fully_linked_program_with_binary, 1, &deviceID, NULL, NULL, NULL); - test_error( error, "Unable to build a simple program" ); + error = clBuildProgram(fully_linked_program_with_binary, 1, &deviceID, NULL, + NULL, NULL); + test_error(error, "Unable to build a simple program"); - cl_kernel kernel = clCreateKernel(fully_linked_program_with_binary, "CopyBuffer", &error); - test_error( error, "Unable to create a simple kernel" ); + cl_kernel kernel = + clCreateKernel(fully_linked_program_with_binary, "CopyBuffer", &error); + test_error(error, "Unable to create a simple kernel"); error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; - cl_kernel another_kernel = clCreateKernel(fully_linked_program_with_binary, "AnotherCopyBuffer", &error); - test_error( error, "Unable to create another simple kernel" ); + cl_kernel another_kernel = clCreateKernel(fully_linked_program_with_binary, + "AnotherCopyBuffer", &error); + test_error(error, "Unable to create another simple kernel"); error = verifyCopyBuffer(context, queue, another_kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseKernel( another_kernel ); - test_error( error, "Unable to release another kernel object" ); + error = clReleaseKernel(another_kernel); + test_error(error, "Unable to release another kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - /* Oh, one more thing. Steve Jobs and apparently Herb Sutter. The question is "Who is copying whom?" */ - error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel); - if( program == NULL || error != CL_SUCCESS ) + /* Oh, one more thing. Steve Jobs and apparently Herb Sutter. The question + * is "Who is copying whom?" */ + error = create_single_kernel_helper_create_program(context, &program, 1, + &simple_kernel); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ ); + log_error( + "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", + IGetErrorString(error), __FILE__, __LINE__); return -1; } error = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL); - test_error( error, "Unable to build a simple program" ); - error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL); - test_error( error, "Unable to get program binary type" ); + test_error(error, "Unable to build a simple program"); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BINARY_TYPE, + sizeof(cl_program_binary_type), &program_type, + NULL); + test_error(error, "Unable to get program binary type"); if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE) { - log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ ); + log_error( + "ERROR: Expected program type of a program created from compiled " + "object to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", + __FILE__, __LINE__); return -1; } program_type = -1; /* All's well that ends well. William Shakespeare */ - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( another_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(another_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( library_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(library_with_binary); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( fully_linked_program_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(fully_linked_program_with_binary); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( program_with_binary ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program_with_binary); + test_error(error, "Unable to release program object"); return 0; } -volatile int compileNotificationSent; +volatile int compileNotificationSent; -void CL_CALLBACK test_notify_compile_complete( cl_program program, void *userData ) +void CL_CALLBACK test_notify_compile_complete(cl_program program, + void *userData) { - if( userData == NULL || strcmp( (char *)userData, "compilation" ) != 0 ) + if (userData == NULL || strcmp((char *)userData, "compilation") != 0) { - log_error( "ERROR: User data passed in to compile notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: User data passed in to compile notify function was " + "not correct! (in %s:%d)\n", + __FILE__, __LINE__); compileNotificationSent = -1; } else compileNotificationSent = 1; - log_info( "\n <-- program successfully compiled\n" ); + log_info("\n <-- program successfully compiled\n"); } -volatile int libraryCreationNotificationSent; +volatile int libraryCreationNotificationSent; -void CL_CALLBACK test_notify_create_library_complete( cl_program program, void *userData ) +void CL_CALLBACK test_notify_create_library_complete(cl_program program, + void *userData) { - if( userData == NULL || strcmp( (char *)userData, "create library" ) != 0 ) + if (userData == NULL || strcmp((char *)userData, "create library") != 0) { - log_error( "ERROR: User data passed in to library creation notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: User data passed in to library creation notify " + "function was not correct! (in %s:%d)\n", + __FILE__, __LINE__); libraryCreationNotificationSent = -1; } else libraryCreationNotificationSent = 1; - log_info( "\n <-- library successfully created\n" ); + log_info("\n <-- library successfully created\n"); } -volatile int linkNotificationSent; +volatile int linkNotificationSent; -void CL_CALLBACK test_notify_link_complete( cl_program program, void *userData ) +void CL_CALLBACK test_notify_link_complete(cl_program program, void *userData) { - if( userData == NULL || strcmp( (char *)userData, "linking" ) != 0 ) + if (userData == NULL || strcmp((char *)userData, "linking") != 0) { - log_error( "ERROR: User data passed in to link notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ ); + log_error("ERROR: User data passed in to link notify function was not " + "correct! (in %s:%d)\n", + __FILE__, __LINE__); linkNotificationSent = -1; } else linkNotificationSent = 1; - log_info( "\n <-- program successfully linked\n" ); + log_info("\n <-- program successfully linked\n"); } -int test_large_compile_and_link_status_options_log(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines) +int test_large_compile_and_link_status_options_log(cl_context context, + cl_device_id deviceID, + cl_command_queue queue, + unsigned int numLines) { int error; cl_program program; - cl_program * simple_kernels; + cl_program *simple_kernels; const char **lines; unsigned int i; char buffer[MAX_LINE_SIZE_IN_PROGRAM]; @@ -2984,263 +3686,349 @@ int test_large_compile_and_link_status_options_log(cl_context context, cl_device cl_build_status status; size_t size_ret; - compileNotificationSent = libraryCreationNotificationSent = linkNotificationSent = 0; + compileNotificationSent = libraryCreationNotificationSent = + linkNotificationSent = 0; - simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program)); - if (simple_kernels == NULL) { - log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__); + simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program)); + if (simple_kernels == NULL) + { + log_error("ERROR: Unable to allocate kernels array with %d kernels! " + "(in %s:%d)\n", + numLines, __FILE__, __LINE__); return -1; } /* First, allocate the array for our line pointers */ - lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) ); - if (lines == NULL) { - log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__); + lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *)); + if (lines == NULL) + { + log_error( + "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", + (2 * numLines + 2), __FILE__, __LINE__); return -1; } - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, composite_kernel_extern_template, i); lines[i] = _strdup(buffer); } /* First and last lines are easy */ - lines[ numLines ] = composite_kernel_start; - lines[ 2*numLines + 1] = composite_kernel_end; + lines[numLines] = composite_kernel_start; + lines[2 * numLines + 1] = composite_kernel_end; /* Fill the rest with templated kernels */ - for(i = numLines + 1; i < 2*numLines + 1; i++ ) + for (i = numLines + 1; i < 2 * numLines + 1; i++) { sprintf(buffer, composite_kernel_template, i - numLines - 1); - lines[ i ] = _strdup(buffer); + lines[i] = _strdup(buffer); } /* Try to create a program with these lines */ - error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines); - if( program == NULL || error != CL_SUCCESS ) + error = create_single_kernel_helper_create_program(context, &program, + 2 * numLines + 2, lines); + if (program == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d lines! " + "(%s) (in %s:%d)\n", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Lets check that the compilation status is CL_BUILD_NONE */ - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program compile status" ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Unable to get program compile status"); if (status != CL_BUILD_NONE) { - log_error( "ERROR: Expected compile status to be CL_BUILD_NONE prior to the beginning of the compilation! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: Expected compile status to be CL_BUILD_NONE prior to " + "the beginning of the compilation! (status: %d in %s:%d)\n", + (int)status, __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, test_notify_compile_complete, (void *)"compilation"); - test_error( error, "Unable to compile a simple program" ); + error = + clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, + test_notify_compile_complete, (void *)"compilation"); + test_error(error, "Unable to compile a simple program"); - /* Wait for compile to complete (just keep polling, since we're just a test */ - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program compile status" ); + /* Wait for compile to complete (just keep polling, since we're just a test + */ + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Unable to get program compile status"); - while( (int)status == CL_BUILD_IN_PROGRESS ) + while ((int)status == CL_BUILD_IN_PROGRESS) { - log_info( "\n -- still waiting for compile... (status is %d)", status ); - sleep( 1 ); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program compile status" ); + log_info("\n -- still waiting for compile... (status is %d)", status); + sleep(1); + error = + clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS, + sizeof(status), &status, NULL); + test_error(error, "Unable to get program compile status"); } - if( status != CL_BUILD_SUCCESS ) + if (status != CL_BUILD_SUCCESS) { - log_error( "ERROR: compile failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: compile failed! (status: %d in %s:%d)\n", (int)status, + __FILE__, __LINE__); return -1; } - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret ); - test_error( error, "Device failed to return compile log size" ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0, + NULL, &size_ret); + test_error(error, "Device failed to return compile log size"); compile_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, compile_log, NULL ); - if (error != CL_SUCCESS){ - log_error("Device failed to return a compile log (in %s:%d)\n", __FILE__, __LINE__); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, + size_ret, compile_log, NULL); + if (error != CL_SUCCESS) + { + log_error("Device failed to return a compile log (in %s:%d)\n", + __FILE__, __LINE__); test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); } log_info("BUILD LOG: %s\n", compile_log); free(compile_log); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret ); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_OPTIONS, + 0, NULL, &size_ret); test_error(error, "Device failed to return compile options size"); compile_options = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, compile_options, NULL ); - test_error(error, "Device failed to return compile options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); + error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_OPTIONS, + size_ret, compile_options, NULL); + test_error( + error, + "Device failed to return compile options.\nclGetProgramBuildInfo " + "CL_PROGRAM_BUILD_OPTIONS failed"); log_info("BUILD OPTIONS: %s\n", compile_options); free(compile_options); /* Create and compile templated kernels */ - for( i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { sprintf(buffer, simple_kernel_template, i); - const char* kernel_source = _strdup(buffer); - error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source); - if( simple_kernels[i] == NULL || error != CL_SUCCESS ) + const char *kernel_source = _strdup(buffer); + error = create_single_kernel_helper_create_program( + context, &simple_kernels[i], 1, &kernel_source); + if (simple_kernels[i] == NULL || error != CL_SUCCESS) { - log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ ); + log_error("ERROR: Unable to create long test program with %d " + "lines! (%s in %s:%d)", + numLines, IGetErrorString(error), __FILE__, __LINE__); return -1; } /* Compile it */ - error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL); - test_error( error, "Unable to compile a simple program" ); + error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, + NULL, NULL, NULL); + test_error(error, "Unable to compile a simple program"); - free((void*)kernel_source); + free((void *)kernel_source); } /* Create library out of compiled templated kernels */ - cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, test_notify_create_library_complete, (void *)"create library", &error); - test_error( error, "Unable to create a multi-line library" ); + cl_program my_newly_minted_library = clLinkProgram( + context, 1, &deviceID, "-create-library", numLines, simple_kernels, + test_notify_create_library_complete, (void *)"create library", &error); + test_error(error, "Unable to create a multi-line library"); - /* Wait for library creation to complete (just keep polling, since we're just a test */ - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get library creation link status" ); + /* Wait for library creation to complete (just keep polling, since we're + * just a test */ + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get library creation link status"); - while( (int)status == CL_BUILD_IN_PROGRESS ) + while ((int)status == CL_BUILD_IN_PROGRESS) { - log_info( "\n -- still waiting for library creation... (status is %d)", status ); - sleep( 1 ); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get library creation link status" ); + log_info("\n -- still waiting for library creation... (status is %d)", + status); + sleep(1); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get library creation link status"); } - if( status != CL_BUILD_SUCCESS ) + if (status != CL_BUILD_SUCCESS) { - log_error( "ERROR: library creation failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: library creation failed! (status: %d in %s:%d)\n", + (int)status, __FILE__, __LINE__); return -1; } - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret ); - test_error( error, "Device failed to return a library creation log size" ); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret); + test_error(error, "Device failed to return a library creation log size"); library_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, library_log, NULL ); - if (error != CL_SUCCESS) { - log_error("Device failed to return a library creation log (in %s:%d)\n", __FILE__, __LINE__); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_LOG, size_ret, library_log, + NULL); + if (error != CL_SUCCESS) + { + log_error("Device failed to return a library creation log (in %s:%d)\n", + __FILE__, __LINE__); test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); } log_info("CREATE LIBRARY LOG: %s\n", library_log); free(library_log); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret ); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret); test_error(error, "Device failed to return library creation options size"); library_options = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, library_options, NULL ); - test_error(error, "Device failed to return library creation options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); + error = clGetProgramBuildInfo(my_newly_minted_library, deviceID, + CL_PROGRAM_BUILD_OPTIONS, size_ret, + library_options, NULL); + test_error( + error, + "Device failed to return library creation " + "options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); log_info("CREATE LIBRARY OPTIONS: %s\n", library_options); free(library_options); - /* Link the program that calls the kernels and the library that contains them */ + /* Link the program that calls the kernels and the library that contains + * them */ cl_program programs[2] = { program, my_newly_minted_library }; - cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, test_notify_link_complete, (void *)"linking", &error); - test_error( error, "Unable to link a program with a library" ); + cl_program my_newly_linked_program = + clLinkProgram(context, 1, &deviceID, NULL, 2, programs, + test_notify_link_complete, (void *)"linking", &error); + test_error(error, "Unable to link a program with a library"); - /* Wait for linking to complete (just keep polling, since we're just a test */ - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program link status" ); + /* Wait for linking to complete (just keep polling, since we're just a test + */ + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get program link status"); - while( (int)status == CL_BUILD_IN_PROGRESS ) + while ((int)status == CL_BUILD_IN_PROGRESS) { - log_info( "\n -- still waiting for program linking... (status is %d)", status ); - sleep( 1 ); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL ); - test_error( error, "Unable to get program link status" ); + log_info("\n -- still waiting for program linking... (status is %d)", + status); + sleep(1); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_STATUS, sizeof(status), + &status, NULL); + test_error(error, "Unable to get program link status"); } - if( status != CL_BUILD_SUCCESS ) + if (status != CL_BUILD_SUCCESS) { - log_error( "ERROR: program linking failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ ); + log_error("ERROR: program linking failed! (status: %d in %s:%d)\n", + (int)status, __FILE__, __LINE__); return -1; } - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret ); - test_error( error, "Device failed to return a linking log size" ); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret); + test_error(error, "Device failed to return a linking log size"); linking_log = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, linking_log, NULL ); - if (error != CL_SUCCESS){ - log_error("Device failed to return a linking log (in %s:%d).\n", __FILE__, __LINE__); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_LOG, size_ret, linking_log, + NULL); + if (error != CL_SUCCESS) + { + log_error("Device failed to return a linking log (in %s:%d).\n", + __FILE__, __LINE__); test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); } log_info("BUILDING LOG: %s\n", linking_log); free(linking_log); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret ); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret); test_error(error, "Device failed to return linking options size"); linking_options = (char *)malloc(size_ret); - error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, linking_options, NULL ); - test_error(error, "Device failed to return linking options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed"); + error = clGetProgramBuildInfo(my_newly_linked_program, deviceID, + CL_PROGRAM_BUILD_OPTIONS, size_ret, + linking_options, NULL); + test_error( + error, + "Device failed to return linking options.\nclGetProgramBuildInfo " + "CL_PROGRAM_BUILD_OPTIONS failed"); log_info("BUILDING OPTIONS: %s\n", linking_options); free(linking_options); // Create the composite kernel - cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); - test_error( error, "Unable to create a composite kernel" ); + cl_kernel kernel = + clCreateKernel(my_newly_linked_program, "CompositeKernel", &error); + test_error(error, "Unable to create a composite kernel"); // Run the composite kernel and verify the results error = verifyCopyBuffer(context, queue, kernel); - if (error != CL_SUCCESS) - return error; + if (error != CL_SUCCESS) return error; /* All done! */ - error = clReleaseKernel( kernel ); - test_error( error, "Unable to release kernel object" ); + error = clReleaseKernel(kernel); + test_error(error, "Unable to release kernel object"); - error = clReleaseProgram( program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(program); + test_error(error, "Unable to release program object"); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - free( (void*)lines[i] ); - free( (void*)lines[i+numLines+1] ); + free((void *)lines[i]); + free((void *)lines[i + numLines + 1]); } - free( lines ); + free(lines); - for(i = 0; i < numLines; i++) + for (i = 0; i < numLines; i++) { - error = clReleaseProgram( simple_kernels[i] ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(simple_kernels[i]); + test_error(error, "Unable to release program object"); } - free( simple_kernels ); + free(simple_kernels); - error = clReleaseProgram( my_newly_minted_library ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_minted_library); + test_error(error, "Unable to release program object"); - error = clReleaseProgram( my_newly_linked_program ); - test_error( error, "Unable to release program object" ); + error = clReleaseProgram(my_newly_linked_program); + test_error(error, "Unable to release program object"); return 0; } -int test_compile_and_link_status_options_log(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) +int test_compile_and_link_status_options_log(cl_device_id deviceID, + cl_context context, + cl_command_queue queue, + int num_elements) { - unsigned int toTest[] = { 256, 0 }; //512, 1024, 8192, 16384, 32768, 0 }; + unsigned int toTest[] = { 256, 0 }; // 512, 1024, 8192, 16384, 32768, 0 }; unsigned int i; - log_info( "Testing Compile and Link Status, Options and Logging ...this might take awhile...\n" ); + log_info("Testing Compile and Link Status, Options and Logging ...this " + "might take awhile...\n"); - for( i = 0; toTest[ i ] != 0; i++ ) + for (i = 0; toTest[i] != 0; i++) { - log_info( " %d...\n", toTest[ i ] ); + log_info(" %d...\n", toTest[i]); #if defined(_WIN32) clock_t start = clock(); -#elif defined(__linux__) || defined(__APPLE__) - timeval time1, time2; - gettimeofday(&time1, NULL); +#elif defined(__linux__) || defined(__APPLE__) + timeval time1, time2; + gettimeofday(&time1, NULL); #endif - if( test_large_compile_and_link_status_options_log( context, deviceID, queue, toTest[ i ] ) != 0 ) + if (test_large_compile_and_link_status_options_log(context, deviceID, + queue, toTest[i]) + != 0) { - log_error( "ERROR: large program compilation, linking, status, options and logging test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ ); + log_error( + "ERROR: large program compilation, linking, status, options " + "and logging test failed for %d lines! (in %s:%d)\n", + toTest[i], __FILE__, __LINE__); return -1; } #if defined(_WIN32) clock_t end = clock(); - log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] ); -#elif defined(__linux__) || defined(__APPLE__) - gettimeofday(&time2, NULL); - log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] ); + log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false, + "clock() time in secs", "%d lines", toTest[i]); +#elif defined(__linux__) || defined(__APPLE__) + gettimeofday(&time2, NULL); + log_perf((float)(float)(time2.tv_sec - time1.tv_sec) + + 1.0e-6 * (time2.tv_usec - time1.tv_usec), + false, "wall time in secs", "%d lines", toTest[i]); #endif } diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp index d53af8dc..89626b79 100644 --- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp +++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp @@ -76,6 +76,7 @@ const char *known_extensions[] = { "cl_khr_device_uuid", "cl_khr_pci_bus_info", "cl_khr_suggested_local_work_size", + "cl_khr_expect_assume", "cl_khr_spirv_linkonce_odr", "cl_khr_semaphore", "cl_khr_external_semaphore", diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp index 9cecabea..b1d73af3 100644 --- a/test_conformance/computeinfo/main.cpp +++ b/test_conformance/computeinfo/main.cpp @@ -439,8 +439,8 @@ int getPlatformConfigInfo(cl_platform_id platform, config_info* info) err = clGetPlatformInfo(platform, info->opcode, config_size_set, &info->config.cl_name_version_single, &config_size_ret); + size_err = config_size_set != config_size_ret; } - size_err = config_size_set != config_size_ret; break; default: log_error("Unknown config type: %d\n", info->config_type); @@ -585,8 +585,8 @@ int getConfigInfo(cl_device_id device, config_info* info) err = clGetDeviceInfo(device, info->opcode, config_size_set, &info->config.cl_name_version_single, &config_size_ret); + size_err = config_size_set != config_size_ret; } - size_err = config_size_set != config_size_ret; break; default: log_error("Unknown config type: %d\n", info->config_type); diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt index cc019b26..8ed3ba18 100644 --- a/test_conformance/conversions/CMakeLists.txt +++ b/test_conformance/conversions/CMakeLists.txt @@ -16,6 +16,6 @@ set_source_files_properties( COMPILE_FLAGS -march=i686) endif(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") +set_gnulike_module_compile_flags("-Wno-unused-but-set-variable -Wno-sign-compare") include(../CMakeCommon.txt) diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp index dfb32279..1020638a 100644 --- a/test_conformance/conversions/basic_test_conversions.cpp +++ b/test_conformance/conversions/basic_test_conversions.cpp @@ -1,6 +1,6 @@ // // Copyright (c) 2017 The Khronos Group Inc. -// +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -15,2243 +15,1496 @@ // #include "harness/testHarness.h" #include "harness/compat.h" +#include "harness/ThreadPool.h" -#include "basic_test_conversions.h" -#include <limits.h> -#include <string.h> +#if defined(__APPLE__) +#include <sys/sysctl.h> +#include <mach/mach_time.h> +#endif -#include "harness/mt19937.h" +#if defined(__linux__) +#include <unistd.h> +#include <sys/syscall.h> +#include <linux/sysctl.h> +#endif +#if defined(__linux__) +#include <sys/param.h> +#include <libgen.h> +#endif -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) -#include "fplib.h" +#if defined(__MINGW32__) +#include <sys/param.h> #endif -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) -/* Rounding modes and saturation for use with qcom 64 bit to float conversion library */ - bool qcom_sat; - roundingMode qcom_rm; +#include <sstream> +#include <stdarg.h> +#if !defined(_WIN32) +#include <libgen.h> +#include <sys/mman.h> #endif +#include <time.h> -static inline cl_ulong random64( MTdata d ); +#include <algorithm> -#if defined (_WIN32) - #include <mmintrin.h> - #include <emmintrin.h> +#include <vector> +#include <type_traits> + +#include "basic_test_conversions.h" + +#if defined(_WIN32) +#include <mmintrin.h> +#include <emmintrin.h> #else // !_WIN32 -#if defined (__SSE__ ) - #include <xmmintrin.h> +#if defined(__SSE__) +#include <xmmintrin.h> #endif -#if defined (__SSE2__ ) - #include <emmintrin.h> +#if defined(__SSE2__) +#include <emmintrin.h> #endif #endif // _WIN32 -const char *gTypeNames[ kTypeCount ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; - -const char *gRoundingModeNames[ kRoundingModeCount ] = { - "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gSaturationNames[ 2 ] = { "", "_sat" }; - -size_t gTypeSizes[ kTypeCount ] = { - sizeof( cl_uchar ), sizeof( cl_char ), - sizeof( cl_ushort ), sizeof( cl_short ), - sizeof( cl_uint ), sizeof( cl_int ), - sizeof( cl_float ), sizeof( cl_double ), - sizeof( cl_ulong ), sizeof( cl_long ), - }; - -long lrintf_clamped( float f ); -long lrintf_clamped( float f ) -{ - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; +cl_context gContext = NULL; +cl_command_queue gQueue = NULL; +int gStartTestNumber = -1; +int gEndTestNumber = 0; +#if defined(__APPLE__) +int gTimeResults = 1; +#else +int gTimeResults = 0; +#endif +int gReportAverageTimes = 0; +void *gIn = NULL; +void *gRef = NULL; +void *gAllowZ = NULL; +void *gOut[kCallStyleCount] = { NULL }; +cl_mem gInBuffer; +cl_mem gOutBuffers[kCallStyleCount]; +size_t gComputeDevices = 0; +uint32_t gDeviceFrequency = 0; +int gWimpyMode = 0; +int gWimpyReductionFactor = 128; +int gSkipTesting = 0; +int gForceFTZ = 0; +int gIsRTZ = 0; +uint32_t gSimdSize = 1; +int gHasDouble = 0; +int gTestDouble = 1; +const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" }; +int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 }; +int gMinVectorSize = 0; +int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]); +MTdata gMTdata; +const char **argList = NULL; +int argCount = 0; + + +double SubtractTime(uint64_t endTime, uint64_t startTime); + + +// clang-format off +// for readability sake keep this section unformatted + +std::vector<unsigned int> DataInitInfo::specialValuesUInt = { + uint32_t(INT_MIN), uint32_t(INT_MIN + 1), uint32_t(INT_MIN + 2), + uint32_t(-(1 << 30) - 3), uint32_t(-(1 << 30) - 2), uint32_t(-(1 << 30) - 1), uint32_t(-(1 << 30)), + uint32_t(-(1 << 30) + 1), uint32_t(-(1 << 30) + 2), uint32_t(-(1 << 30) + 3), + uint32_t(-(1 << 24) - 3), uint32_t(-(1 << 24) - 2),uint32_t(-(1 << 24) - 1), + uint32_t(-(1 << 24)), uint32_t(-(1 << 24) + 1), uint32_t(-(1 << 24) + 2), uint32_t(-(1 << 24) + 3), + uint32_t(-(1 << 23) - 3), uint32_t(-(1 << 23) - 2),uint32_t(-(1 << 23) - 1), + uint32_t(-(1 << 23)), uint32_t(-(1 << 23) + 1), uint32_t(-(1 << 23) + 2), uint32_t(-(1 << 23) + 3), + uint32_t(-(1 << 22) - 3), uint32_t(-(1 << 22) - 2),uint32_t(-(1 << 22) - 1), + uint32_t(-(1 << 22)), uint32_t(-(1 << 22) + 1), uint32_t(-(1 << 22) + 2), uint32_t(-(1 << 22) + 3), + uint32_t(-(1 << 21) - 3), uint32_t(-(1 << 21) - 2),uint32_t(-(1 << 21) - 1), + uint32_t(-(1 << 21)), uint32_t(-(1 << 21) + 1), uint32_t(-(1 << 21) + 2), uint32_t(-(1 << 21) + 3), + uint32_t(-(1 << 16) - 3), uint32_t(-(1 << 16) - 2),uint32_t(-(1 << 16) - 1), + uint32_t(-(1 << 16)), uint32_t(-(1 << 16) + 1), uint32_t(-(1 << 16) + 2), uint32_t(-(1 << 16) + 3), + uint32_t(-(1 << 15) - 3), uint32_t(-(1 << 15) - 2),uint32_t(-(1 << 15) - 1), + uint32_t(-(1 << 15)), uint32_t(-(1 << 15) + 1), uint32_t(-(1 << 15) + 2), uint32_t(-(1 << 15) + 3), + uint32_t(-(1 << 8) - 3), uint32_t(-(1 << 8) - 2),uint32_t(-(1 << 8) - 1), + uint32_t(-(1 << 8)), uint32_t(-(1 << 8) + 1), uint32_t(-(1 << 8) + 2), uint32_t(-(1 << 8) + 3), + uint32_t(-(1 << 7) - 3), uint32_t(-(1 << 7) - 2),uint32_t(-(1 << 7) - 1), + uint32_t(-(1 << 7)), uint32_t(-(1 << 7) + 1), uint32_t(-(1 << 7) + 2), uint32_t(-(1 << 7) + 3), + uint32_t(-4), uint32_t(-3), uint32_t(-2), uint32_t(-1), 0, 1, 2, 3, 4, + (1 << 7) - 3,(1 << 7) - 2,(1 << 7) - 1, (1 << 7), (1 << 7) + 1, (1 << 7) + 2, (1 << 7) + 3, + (1 << 8) - 3,(1 << 8) - 2,(1 << 8) - 1, (1 << 8), (1 << 8) + 1, (1 << 8) + 2, (1 << 8) + 3, + (1 << 15) - 3,(1 << 15) - 2,(1 << 15) - 1, (1 << 15), (1 << 15) + 1, (1 << 15) + 2, (1 << 15) + 3, + (1 << 16) - 3,(1 << 16) - 2,(1 << 16) - 1, (1 << 16), (1 << 16) + 1, (1 << 16) + 2, (1 << 16) + 3, + (1 << 21) - 3,(1 << 21) - 2,(1 << 21) - 1, (1 << 21), (1 << 21) + 1, (1 << 21) + 2, (1 << 21) + 3, + (1 << 22) - 3,(1 << 22) - 2,(1 << 22) - 1, (1 << 22), (1 << 22) + 1, (1 << 22) + 2, (1 << 22) + 3, + (1 << 23) - 3,(1 << 23) - 2,(1 << 23) - 1, (1 << 23), (1 << 23) + 1, (1 << 23) + 2, (1 << 23) + 3, + (1 << 24) - 3,(1 << 24) - 2,(1 << 24) - 1, (1 << 24), (1 << 24) + 1, (1 << 24) + 2, (1 << 24) + 3, + (1 << 30) - 3,(1 << 30) - 2,(1 << 30) - 1, (1 << 30), (1 << 30) + 1, (1 << 30) + 2, (1 << 30) + 3, + INT_MAX - 3, INT_MAX - 2, INT_MAX - 1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above + UINT_MAX - 3, UINT_MAX - 2, UINT_MAX - 1, UINT_MAX +}; - if( f >= -(float) LONG_MIN ) - return LONG_MAX; +std::vector<float> DataInitInfo::specialValuesFloat = { + -NAN, -INFINITY, -FLT_MAX, + MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), + MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), + MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), + MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), + -1000.f, -100.f, -4.0f, -3.5f, -3.0f, + MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, + MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, + MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, + MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, + MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, + MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, + MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), + MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), + MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), + MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), + MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), + MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, +NAN, +INFINITY, +FLT_MAX, + MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), + MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), + MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), + MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), + +1000.f, +100.f, +4.0f, +3.5f, +3.0f, + MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), +2.0f, + MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), + MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), + MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), + MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), + MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), + MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), + MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), + MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), + MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), + MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f +}; - if( f <= (float) LONG_MIN ) - return LONG_MIN; +// A table of more difficult cases to get right +std::vector<double> DataInitInfo::specialValuesDouble = { + -NAN, -INFINITY, -DBL_MAX, + MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8), + MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12), + MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), + MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), + -1000., -100., -4.0, -3.5, -3.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, + MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, + MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, + MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), + -DBL_MIN, + MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), + MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), + MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), + MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), + -0.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), + MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), + MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), + MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), + MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), + MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), + MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), + +1000., +100., +4.0, +3.5, +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, + MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), + +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), + +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), + +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), + +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), + +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), + MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), + MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), + MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), + MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), + MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), + MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), + MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), + MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), + MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), + MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), + MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), + MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), + MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), + MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), + MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), + MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), + MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10), +}; +// clang-format on - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) ) - { - volatile float x = f; - float magicVal = magic[ f < 0 ]; - -#if defined( __SSE__ ) || defined (_WIN32) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); + +// Windows (since long double got deprecated) sets the x87 to 53-bit precision +// (that's x87 default state). This causes problems with the tests that +// convert long and ulong to float and double or otherwise deal with values +// that need more precision than 53-bit. So, set the x87 to 64-bit precision. +static inline void Force64BitFPUPrecision(void) +{ +#if __MINGW32__ + // The usual method is to use _controlfp as follows: + // #include <float.h> + // _controlfp(_PC_64, _MCW_PC); + // + // _controlfp is available on MinGW32 but not on MinGW64. Instead of having + // divergent code just use inline assembly which works for both. + unsigned short int orig_cw = 0; + unsigned short int new_cw = 0; + __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); + new_cw = orig_cw | 0x0300; // set precision to 64-bit + __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); #else - x += magicVal; - x -= magicVal; + /* Implement for other platforms if needed */ #endif - f = x; +} + + +template <typename InType, typename OutType> +int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count, + int vectorSize) +{ + const cl_uchar *a = (const cl_uchar *)gAllowZ; + + if (std::is_integral<OutType>::value) + { // char/uchar/short/ushort/int/uint/long/ulong + const OutType *t = (const OutType *)test; + const OutType *c = (const OutType *)gRef; + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (OutType)0)) + { + size_t s = sizeof(OutType) * 2; + std::stringstream sstr; + sstr << "\nError for vector size %d found at 0x%8.8x: *0x%" + << s << "." << s << "x vs 0x%" << s << "." << s << "x\n"; + vlog(sstr.str().c_str(), vectorSize, i, c[i], t[i]); + return i + 1; + } + } + else if (std::is_same<OutType, cl_float>::value) + { + // cast to integral - from original test + const cl_uint *t = (const cl_uint *)test; + const cl_uint *c = (const cl_uint *)gRef; + + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && + // Allow nan's to be binary different + !((t[i] & 0x7fffffffU) > 0x7f800000U + && (c[i] & 0x7fffffffU) > 0x7f800000U) + && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x80000000U))) + { + vlog( + "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", + vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]); + return i + 1; + } + } + else + { + const cl_ulong *t = (const cl_ulong *)test; + const cl_ulong *c = (const cl_ulong *)gRef; + + for (uint32_t i = 0; i < count; i++) + if (t[i] != c[i] && + // Allow nan's to be binary different + !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL + && (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) + && !(a[i] != (cl_uchar)0 + && t[i] == (c[i] & 0x8000000000000000ULL))) + { + vlog( + "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", + vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]); + return i + 1; + } } - return (long) f; + return 0; } -long long llrintf_clamped( float f ); -long long llrintf_clamped( float f ) + +cl_uint RoundUpToNextPowerOfTwo(cl_uint x) { - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; + if (0 == (x & (x - 1))) return x; - if( f >= -(float) LLONG_MIN ) - return LLONG_MAX; + while (x & (x - 1)) x &= x - 1; + + return x + x; +} - if( f <= (float) LLONG_MIN ) - return LLONG_MIN; - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) ) +cl_int CustomConversionsTest::Run() +{ + int startMinVectorSize = gMinVectorSize; + Type inType, outType; + RoundingMode round; + SaturationMode sat; + + for (int i = 0; i < argCount; i++) { - volatile float x = f; - float magicVal = magic[ f < 0 ]; -#if defined( __SSE__ ) || defined (_WIN32) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + if (conv_test::GetTestCase(argList[i], &outType, &inType, &sat, &round)) + { + vlog_error("\n\t\t**** ERROR: Unable to parse function name " + "%s. Skipping.... *****\n\n", + argList[i]); + continue; + } + + // skip double if we don't have it + if (!gTestDouble && (inType == kdouble || outType == kdouble)) + { + if (gHasDouble) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + vlog("\t\tcl_khr_fp64 enabled, but double testing turned " + "off.\n"); + } + continue; + } + + // skip longs on embedded + if (!gHasLong + && (inType == klong || outType == klong || inType == kulong + || outType == kulong)) + { + continue; + } + + // Skip the implicit converts if the rounding mode is not default or + // test is saturated + if (0 == startMinVectorSize) + { + if (sat || round != kDefaultRoundingMode) + gMinVectorSize = 1; + else + gMinVectorSize = 0; + } + + IterOverSelectedTypes iter(typeIterator, *this, inType, outType, round, + sat); + + iter.Run(); + + if (gFailCount) + { + vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); + } } - return (long long) f; + return gFailCount; } -long lrint_clamped( double f ); -long lrint_clamped( double f ) + +ConversionsTest::ConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : context(context), device(device), queue(queue), num_elements(0), + typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0), + cl_uint(0), cl_int(0), cl_float(0), cl_double(0), + cl_ulong(0), cl_long(0) }) +{} + + +cl_int ConversionsTest::Run() { - static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; + IterOverTypes iter(typeIterator, *this); - if( sizeof( long ) > 4 ) + iter.Run(); + + return gFailCount; +} + + +cl_int ConversionsTest::SetUp(int elements) +{ + num_elements = elements; + return CL_SUCCESS; +} + + +template <typename InType, typename OutType> +void ConversionsTest::TestTypesConversion(const Type &inType, + const Type &outType, int &testNumber, + int startMinVectorSize) +{ + SaturationMode sat; + RoundingMode round; + int error; + + // skip longs on embedded + if (!gHasLong + && (inType == klong || outType == klong || inType == kulong + || outType == kulong)) { - if( f >= -(double) LONG_MIN ) - return LONG_MAX; + return; } - else + + for (sat = (SaturationMode)0; sat < kSaturationModeCount; + sat = (SaturationMode)(sat + 1)) { - if( f >= LONG_MAX ) - return LONG_MAX; - } + // skip illegal saturated conversions to float type + if (kSaturated == sat && (outType == kfloat || outType == kdouble)) + { + continue; + } + + for (round = (RoundingMode)0; round < kRoundingModeCount; + round = (RoundingMode)(round + 1)) + { + if (++testNumber < gStartTestNumber) + { + continue; + } + else + { + if (gEndTestNumber > 0 && testNumber >= gEndTestNumber) return; + } - if( f <= (double) LONG_MIN ) - return LONG_MIN; + vlog("%d) Testing convert_%sn%s%s( %sn ):\n", testNumber, + gTypeNames[outType], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType]); - // Round fractional values to integer in round towards nearest mode - if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) ) - { - volatile double x = f; - double magicVal = magic[ f < 0 ]; -#if defined( __SSE2__ ) || defined (_MSC_VER) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128d v = _mm_set_sd( x ); - __m128d m = _mm_set_sd( magicVal ); - v = _mm_add_sd( v, m ); - v = _mm_sub_sd( v, m ); - _mm_store_sd( (double*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; - } + // skip double if we don't have it + if (!gTestDouble && (inType == kdouble || outType == kdouble)) + { + if (gHasDouble) + { + vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " + "FAILED ** \n", + testNumber, gTypeNames[outType], + gSaturationNames[sat], gRoundingModeNames[round], + gTypeNames[inType]); + vlog("\t\tcl_khr_fp64 enabled, but double " + "testing turned off.\n"); + } + continue; + } + + // Skip the implicit converts if the rounding mode is + // not default or test is saturated + if (0 == startMinVectorSize) + { + if (sat || round != kDefaultRoundingMode) + gMinVectorSize = 1; + else + gMinVectorSize = 0; + } - return (long) f; + if ((error = DoTest<InType, OutType>(outType, inType, sat, round))) + { + vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " + "FAILED ** \n", + testNumber, gTypeNames[outType], + gSaturationNames[sat], gRoundingModeNames[round], + gTypeNames[inType]); + } + } + } } -long long llrint_clamped( double f ); -long long llrint_clamped( double f ) + +template <typename InType, typename OutType> +int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat, + RoundingMode round) { - static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; +#ifdef __APPLE__ + cl_ulong wall_start = mach_absolute_time(); +#endif + + cl_uint threads = GetThreadCount(); - if( f >= -(double) LLONG_MIN ) - return LLONG_MAX; + DataInitInfo info = { 0, 0, outType, inType, sat, round, threads }; + DataInfoSpec<InType, OutType> init_info(info); + WriteInputBufferInfo writeInputBufferInfo; + int vectorSize; + int error = 0; + uint64_t i; - if( f <= (double) LLONG_MIN ) - return LLONG_MIN; + gTestCount++; + size_t blockCount = + BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); + size_t step = blockCount; - // Round fractional values to integer in round towards nearest mode - if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) ) + for (i = 0; i < threads; i++) { - volatile double x = f; - double magicVal = magic[ f < 0 ]; -#if defined( __SSE2__ ) || defined (_MSC_VER) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128d v = _mm_set_sd( x ); - __m128d m = _mm_set_sd( magicVal ); - v = _mm_add_sd( v, m ); - v = _mm_sub_sd( v, m ); - _mm_store_sd( (double*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + init_info.mdv.emplace_back(MTdataHolder(gRandomSeed)); } - return (long long) f; -} - + writeInputBufferInfo.outType = outType; + writeInputBufferInfo.inType = inType; -/* - Names created as: + writeInputBufferInfo.calcInfo.resize(gMaxVectorSize); + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + writeInputBufferInfo.calcInfo[vectorSize].reset( + new CalcRefValsPat<InType, OutType>()); + writeInputBufferInfo.calcInfo[vectorSize]->program = + conv_test::MakeProgram( + outType, inType, sat, round, vectorSize, + &writeInputBufferInfo.calcInfo[vectorSize]->kernel); + if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->program) + { + gFailCount++; + return -1; + } + if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->kernel) + { + gFailCount++; + vlog_error("\t\tFAILED -- Failed to create kernel.\n"); + return -2; + } - #include <stdio.h> + writeInputBufferInfo.calcInfo[vectorSize]->parent = + &writeInputBufferInfo; + writeInputBufferInfo.calcInfo[vectorSize]->vectorSize = vectorSize; + writeInputBufferInfo.calcInfo[vectorSize]->result = -1; + } - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; + if (gSkipTesting) return error; - int main( void ) + // Patch up rounding mode if default is RTZ + // We leave the part above in default rounding mode so that the right kernel + // is compiled. + if (std::is_same<OutType, cl_float>::value) { + if (round == kDefaultRoundingMode && gIsRTZ) + init_info.round = round = kRoundTowardZero; + } - int i,j; + // Figure out how many elements are in a work block + // we handle 64-bit types a bit differently. + uint64_t lastCase = (8 * gTypeSizes[inType] > 32) + ? 0x100000000ULL + : 1ULL << (8 * gTypeSizes[inType]); - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) - { - if( j == i ) - continue; + if (!gWimpyMode && gIsEmbedded) + step = blockCount * EMBEDDED_REDUCTION_FACTOR; - vlog( "void %s2%s( void *, void *);\n", names[i], names[j] ); - } + if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor; + vlog("Testing... "); + fflush(stdout); + for (i = 0; i < (uint64_t)lastCase; i += step) + { + if (0 == (i & ((lastCase >> 3) - 1))) + { + vlog("."); + fflush(stdout); + } - return 0; - } -*/ - -static float my_fabsf( float x ); -static double my_fabs( double x ); - - - -static void uchar2char( void *, void *); -static void uchar2ushort( void *, void *); -static void uchar2short( void *, void *); -static void uchar2uint( void *, void *); -static void uchar2int( void *, void *); -static void uchar2float( void *, void *); -static void uchar2double( void *, void *); -static void uchar2ulong( void *, void *); -static void uchar2long( void *, void *); -static void char2uchar( void *, void *); -static void char2ushort( void *, void *); -static void char2short( void *, void *); -static void char2uint( void *, void *); -static void char2int( void *, void *); -static void char2float( void *, void *); -static void char2double( void *, void *); -static void char2ulong( void *, void *); -static void char2long( void *, void *); -static void ushort2uchar( void *, void *); -static void ushort2char( void *, void *); -static void ushort2short( void *, void *); -static void ushort2uint( void *, void *); -static void ushort2int( void *, void *); -static void ushort2float( void *, void *); -static void ushort2double( void *, void *); -static void ushort2ulong( void *, void *); -static void ushort2long( void *, void *); -static void short2uchar( void *, void *); -static void short2char( void *, void *); -static void short2ushort( void *, void *); -static void short2uint( void *, void *); -static void short2int( void *, void *); -static void short2float( void *, void *); -static void short2double( void *, void *); -static void short2ulong( void *, void *); -static void short2long( void *, void *); -static void uint2uchar( void *, void *); -static void uint2char( void *, void *); -static void uint2ushort( void *, void *); -static void uint2short( void *, void *); -static void uint2int( void *, void *); -static void uint2float( void *, void *); -static void uint2double( void *, void *); -static void uint2ulong( void *, void *); -static void uint2long( void *, void *); -static void int2uchar( void *, void *); -static void int2char( void *, void *); -static void int2ushort( void *, void *); -static void int2short( void *, void *); -static void int2uint( void *, void *); -static void int2float( void *, void *); -static void int2double( void *, void *); -static void int2ulong( void *, void *); -static void int2long( void *, void *); -static void float2uchar( void *, void *); -static void float2char( void *, void *); -static void float2ushort( void *, void *); -static void float2short( void *, void *); -static void float2uint( void *, void *); -static void float2int( void *, void *); -static void float2double( void *, void *); -static void float2ulong( void *, void *); -static void float2long( void *, void *); -static void double2uchar( void *, void *); -static void double2char( void *, void *); -static void double2ushort( void *, void *); -static void double2short( void *, void *); -static void double2uint( void *, void *); -static void double2int( void *, void *); -static void double2float( void *, void *); -static void double2ulong( void *, void *); -static void double2long( void *, void *); -static void ulong2uchar( void *, void *); -static void ulong2char( void *, void *); -static void ulong2ushort( void *, void *); -static void ulong2short( void *, void *); -static void ulong2uint( void *, void *); -static void ulong2int( void *, void *); -static void ulong2float( void *, void *); -static void ulong2double( void *, void *); -static void ulong2long( void *, void *); -static void long2uchar( void *, void *); -static void long2char( void *, void *); -static void long2ushort( void *, void *); -static void long2short( void *, void *); -static void long2uint( void *, void *); -static void long2int( void *, void *); -static void long2float( void *, void *); -static void long2double( void *, void *); -static void long2ulong( void *, void *); - -/* - Conversion list created as - - #include <stdio.h> - - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - - int main( void ) - { + cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); + writeInputBufferInfo.count = count; - int i,j; + // Crate a user event to represent the status of the reference value + // computation completion + writeInputBufferInfo.calcReferenceValues = + clCreateUserEvent(gContext, &error); + if (error || NULL == writeInputBufferInfo.calcReferenceValues) + { + vlog_error("ERROR: Unable to create user event. (%d)\n", error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) + // retain for consumption by MapOutputBufferComplete + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) { - vlog( "{ " ); - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) + if ((error = + clRetainEvent(writeInputBufferInfo.calcReferenceValues))) { - if( j == i ) - vlog( " NULL, " ); - else - { - char s[64]; - sprintf( s, "%s2%s,", names[j], names[i] ); - vlog( "%15s ", s ); - } + vlog_error("ERROR: Unable to retain user event. (%d)\n", error); + gFailCount++; + return error; } - vlog( "},\n" ); } - return 0; - } - - */ -/* -Convert gConversions[kTypeCount][kTypeCount] = { -{ NULL, char2uchar, ushort2uchar, short2uchar, uint2uchar, int2uchar, float2uchar, double2uchar, ulong2uchar, long2uchar, }, -{ uchar2char, NULL, ushort2char, short2char, uint2char, int2char, float2char, double2char, ulong2char, long2char, }, -{ uchar2ushort, char2ushort, NULL, short2ushort, uint2ushort, int2ushort, float2ushort, double2ushort, ulong2ushort, long2ushort, }, -{ uchar2short, char2short, ushort2short, NULL, uint2short, int2short, float2short, double2short, ulong2short, long2short, }, -{ uchar2uint, char2uint, ushort2uint, short2uint, NULL, int2uint, float2uint, double2uint, ulong2uint, long2uint, }, -{ uchar2int, char2int, ushort2int, short2int, uint2int, NULL, float2int, double2int, ulong2int, long2int, }, -{ uchar2float, char2float, ushort2float, short2float, uint2float, int2float, NULL, double2float, ulong2float, long2float, }, -{ uchar2double, char2double, ushort2double, short2double, uint2double, int2double, float2double, NULL, ulong2double, long2double, }, -{ uchar2ulong, char2ulong, ushort2ulong, short2ulong, uint2ulong, int2ulong, float2ulong, double2ulong, NULL, long2ulong, }, -{ uchar2long, char2long, ushort2long, short2long, uint2long, int2long, float2long, double2long, ulong2long, NULL, } }; -*/ - -static void uchar2char_sat( void *, void *); -static void uchar2ushort_sat( void *, void *); -static void uchar2short_sat( void *, void *); -static void uchar2uint_sat( void *, void *); -static void uchar2int_sat( void *, void *); -static void uchar2float_sat( void *, void *); -static void uchar2double_sat( void *, void *); -static void uchar2ulong_sat( void *, void *); -static void uchar2long_sat( void *, void *); -static void char2uchar_sat( void *, void *); -static void char2ushort_sat( void *, void *); -static void char2short_sat( void *, void *); -static void char2uint_sat( void *, void *); -static void char2int_sat( void *, void *); -static void char2float_sat( void *, void *); -static void char2double_sat( void *, void *); -static void char2ulong_sat( void *, void *); -static void char2long_sat( void *, void *); -static void ushort2uchar_sat( void *, void *); -static void ushort2char_sat( void *, void *); -static void ushort2short_sat( void *, void *); -static void ushort2uint_sat( void *, void *); -static void ushort2int_sat( void *, void *); -static void ushort2float_sat( void *, void *); -static void ushort2double_sat( void *, void *); -static void ushort2ulong_sat( void *, void *); -static void ushort2long_sat( void *, void *); -static void short2uchar_sat( void *, void *); -static void short2char_sat( void *, void *); -static void short2ushort_sat( void *, void *); -static void short2uint_sat( void *, void *); -static void short2int_sat( void *, void *); -static void short2float_sat( void *, void *); -static void short2double_sat( void *, void *); -static void short2ulong_sat( void *, void *); -static void short2long_sat( void *, void *); -static void uint2uchar_sat( void *, void *); -static void uint2char_sat( void *, void *); -static void uint2ushort_sat( void *, void *); -static void uint2short_sat( void *, void *); -static void uint2int_sat( void *, void *); -static void uint2float_sat( void *, void *); -static void uint2double_sat( void *, void *); -static void uint2ulong_sat( void *, void *); -static void uint2long_sat( void *, void *); -static void int2uchar_sat( void *, void *); -static void int2char_sat( void *, void *); -static void int2ushort_sat( void *, void *); -static void int2short_sat( void *, void *); -static void int2uint_sat( void *, void *); -static void int2float_sat( void *, void *); -static void int2double_sat( void *, void *); -static void int2ulong_sat( void *, void *); -static void int2long_sat( void *, void *); -static void float2uchar_sat( void *, void *); -static void float2char_sat( void *, void *); -static void float2ushort_sat( void *, void *); -static void float2short_sat( void *, void *); -static void float2uint_sat( void *, void *); -static void float2int_sat( void *, void *); -static void float2double_sat( void *, void *); -static void float2ulong_sat( void *, void *); -static void float2long_sat( void *, void *); -static void double2uchar_sat( void *, void *); -static void double2char_sat( void *, void *); -static void double2ushort_sat( void *, void *); -static void double2short_sat( void *, void *); -static void double2uint_sat( void *, void *); -static void double2int_sat( void *, void *); -static void double2float_sat( void *, void *); -static void double2ulong_sat( void *, void *); -static void double2long_sat( void *, void *); -static void ulong2uchar_sat( void *, void *); -static void ulong2char_sat( void *, void *); -static void ulong2ushort_sat( void *, void *); -static void ulong2short_sat( void *, void *); -static void ulong2uint_sat( void *, void *); -static void ulong2int_sat( void *, void *); -static void ulong2float_sat( void *, void *); -static void ulong2double_sat( void *, void *); -static void ulong2long_sat( void *, void *); -static void long2uchar_sat( void *, void *); -static void long2char_sat( void *, void *); -static void long2ushort_sat( void *, void *); -static void long2short_sat( void *, void *); -static void long2uint_sat( void *, void *); -static void long2int_sat( void *, void *); -static void long2float_sat( void *, void *); -static void long2double_sat( void *, void *); -static void long2ulong_sat( void *, void *); -/* - #include <stdio.h> - - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - - int main( void ) - { + // Crate a user event to represent when the callbacks are done verifying + // correctness + writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error); + if (error || NULL == writeInputBufferInfo.doneBarrier) + { + vlog_error("ERROR: Unable to create user event for barrier. (%d)\n", + error); + gFailCount++; + return error; + } - int i,j; + // retain for use by the callback that calls this + if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier))) + { + vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n", + error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) + // Call this in a multithreaded manner + cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2; + init_info.start = i; + init_info.size = count / chunks; + if (init_info.size < 16384) { - vlog( "{ " ); - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) + chunks = RoundUpToNextPowerOfTwo(threads); + init_info.size = count / chunks; + if (init_info.size < 16384) { - if( j == i ) - vlog( " NULL, " ); - else - { - char s[64]; - sprintf( s, "%s2%s_sat,", names[j], names[i] ); - vlog( "%18s ", s ); - } + init_info.size = count; + chunks = 1; } - vlog( "},\n" ); } - return 0; - } + ThreadPool_Do(conv_test::InitData, chunks, &init_info); -Convert gSaturatedConversions[kTypeCount][kTypeCount] = { -{ NULL, char2uchar_sat, ushort2uchar_sat, short2uchar_sat, uint2uchar_sat, int2uchar_sat, float2uchar_sat, double2uchar_sat, ulong2uchar_sat, long2uchar_sat, }, -{ uchar2char_sat, NULL, ushort2char_sat, short2char_sat, uint2char_sat, int2char_sat, float2char_sat, double2char_sat, ulong2char_sat, long2char_sat, }, -{ uchar2ushort_sat, char2ushort_sat, NULL, short2ushort_sat, uint2ushort_sat, int2ushort_sat, float2ushort_sat, double2ushort_sat, ulong2ushort_sat, long2ushort_sat, }, -{ uchar2short_sat, char2short_sat, ushort2short_sat, NULL, uint2short_sat, int2short_sat, float2short_sat, double2short_sat, ulong2short_sat, long2short_sat, }, -{ uchar2uint_sat, char2uint_sat, ushort2uint_sat, short2uint_sat, NULL, int2uint_sat, float2uint_sat, double2uint_sat, ulong2uint_sat, long2uint_sat, }, -{ uchar2int_sat, char2int_sat, ushort2int_sat, short2int_sat, uint2int_sat, NULL, float2int_sat, double2int_sat, ulong2int_sat, long2int_sat, }, -{ uchar2float_sat, char2float_sat, ushort2float_sat, short2float_sat, uint2float_sat, int2float_sat, NULL, double2float_sat, ulong2float_sat, long2float_sat, }, -{ uchar2double_sat, char2double_sat, ushort2double_sat, short2double_sat, uint2double_sat, int2double_sat, float2double_sat, NULL, ulong2double_sat, long2double_sat, }, -{ uchar2ulong_sat, char2ulong_sat, ushort2ulong_sat, short2ulong_sat, uint2ulong_sat, int2ulong_sat, float2ulong_sat, double2ulong_sat, NULL, long2ulong_sat, }, -{ uchar2long_sat, char2long_sat, ushort2long_sat, short2long_sat, uint2long_sat, int2long_sat, float2long_sat, double2long_sat, ulong2long_sat, NULL, } -}; -*/ + // Copy the results to the device + if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, + count * gTypeSizes[inType], gIn, 0, + NULL, NULL))) + { + vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error); + gFailCount++; + return error; + } -/* - #include <stdio.h> + // Call completion callback for the write, which will enqueue the rest + // of the work. + conv_test::WriteInputBufferComplete((void *)&writeInputBufferInfo); - const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" }; - const char *types[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "llong" }; + // Make sure the work is actually running, so we don't deadlock + if ((error = clFlush(gQueue))) + { + vlog_error("clFlush failed with error %d\n", error); + gFailCount++; + return error; + } - int main( void ) - { + ThreadPool_Do(conv_test::PrepareReference, chunks, &init_info); + + // signal we are done calculating the reference results + if ((error = clSetUserEventStatus( + writeInputBufferInfo.calcReferenceValues, CL_COMPLETE))) + { + vlog_error( + "Error: Failed to set user event status to CL_COMPLETE: %d\n", + error); + gFailCount++; + return error; + } - int i,j; + // Wait for the event callbacks to finish verifying correctness. + if ((error = clWaitForEvents( + 1, (cl_event *)&writeInputBufferInfo.doneBarrier))) + { + vlog_error("Error: Failed to wait for barrier: %d\n", error); + gFailCount++; + return error; + } - for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ ) - for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ ) - { - if( j == i ) - continue; + if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues))) + { + vlog_error("Error: Failed to release calcReferenceValues: %d\n", + error); + gFailCount++; + return error; + } - switch( i ) + if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier))) + { + vlog_error("Error: Failed to release done barrier: %d\n", error); + gFailCount++; + return error; + } + + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) + { + if ((error = writeInputBufferInfo.calcInfo[vectorSize]->result)) + { + switch (inType) { - case 6: //float - if( j == 7 ) - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); - else - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) my_rintf(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] ); + case kuchar: + case kchar: + vlog("Input value: 0x%2.2x ", + ((unsigned char *)gIn)[error - 1]); + break; + case kushort: + case kshort: + vlog("Input value: 0x%4.4x ", + ((unsigned short *)gIn)[error - 1]); + break; + case kuint: + case kint: + vlog("Input value: 0x%8.8x ", + ((unsigned int *)gIn)[error - 1]); break; - case 7: //double - if( j == 6 ) - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); - else - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) rint(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] ); + case kfloat: + vlog("Input value: %a ", ((float *)gIn)[error - 1]); + break; + case kulong: + case klong: + vlog("Input value: 0x%16.16llx ", + ((unsigned long long *)gIn)[error - 1]); + break; + case kdouble: + vlog("Input value: %a ", ((double *)gIn)[error - 1]); break; default: - vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) - ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] ); + vlog_error("Internal error at %s: %d\n", __FILE__, + __LINE__); + abort(); break; } - } + // tell the user which conversion it was. + if (0 == vectorSize) + vlog(" (implicit scalar conversion from %s to %s)\n", + gTypeNames[inType], gTypeNames[outType]); + else + vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], + sizeNames[vectorSize], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType], + sizeNames[vectorSize]); - return 0; + gFailCount++; + return error; + } + } } -*/ -float my_fabsf( float x ) -{ - union{ cl_uint u; float f; }u; - u.f = x; - u.u &= 0x7fffffff; - return u.f; -} + log_info("done.\n"); -double my_fabs( double x ) -{ - union{ cl_ulong u; double f; }u; - u.f = x; - u.u &= 0x7fffffffffffffffULL; - return u.f; -} + if (gTimeResults) + { + // Kick off tests for the various vector lengths + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; + vectorSize++) + { + size_t workItemCount = blockCount / vectorSizes[vectorSize]; + if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4) + workItemCount /= + 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]); + + double sum = 0.0; + double bestTime = INFINITY; + cl_uint k; + for (k = 0; k < PERF_LOOP_COUNT; k++) + { + uint64_t startTime = conv_test::GetTime(); + if ((error = conv_test::RunKernel( + writeInputBufferInfo.calcInfo[vectorSize]->kernel, + gInBuffer, gOutBuffers[vectorSize], workItemCount))) + { + gFailCount++; + return error; + } -static float my_rintf( float f ); -static float my_rintf( float f ) -{ - static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) }; + // Make sure OpenCL is done + if ((error = clFinish(gQueue))) + { + vlog_error("Error %d at clFinish\n", error); + return error; + } - // Round fractional values to integer in round towards nearest mode - if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) ) - { - volatile float x = f; - float magicVal = magic[ f < 0 ]; - -#if defined( __SSE__ ) - // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly - __m128 v = _mm_set_ss( x ); - __m128 m = _mm_set_ss( magicVal ); - v = _mm_add_ss( v, m ); - v = _mm_sub_ss( v, m ); - _mm_store_ss( (float*) &x, v ); -#else - x += magicVal; - x -= magicVal; -#endif - f = x; + uint64_t endTime = conv_test::GetTime(); + double time = SubtractTime(endTime, startTime); + sum += time; + if (time < bestTime) bestTime = time; + } + + if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; + double clocksPerOp = bestTime * (double)gDeviceFrequency + * gComputeDevices * gSimdSize * 1e6 + / (workItemCount * vectorSizes[vectorSize]); + if (0 == vectorSize) + vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", + "implicit convert %s -> %s", gTypeNames[inType], + gTypeNames[outType]); + else + vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", + "convert_%s%s%s%s( %s%s )", gTypeNames[outType], + sizeNames[vectorSize], gSaturationNames[sat], + gRoundingModeNames[round], gTypeNames[inType], + sizeNames[vectorSize]); + } } - return f; -} + if (gWimpyMode) + vlog("\tWimp pass"); + else + vlog("\tpassed"); -static void uchar2char( void *out, void *in){ ((char*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2short( void *out, void *in){ ((short*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2int( void *out, void *in){ ((int*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2float( void *out, void *in) -{ - cl_uchar l = ((cl_uchar*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uchar2double( void *out, void *in) -{ - cl_uchar l = ((cl_uchar*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uchar2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; } -static void char2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_char*) in)[0]; } -static void char2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_char*) in)[0]; } -static void char2short( void *out, void *in){ ((short*) out)[0] = ((cl_char*) in)[0]; } -static void char2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_char*) in)[0]; } -static void char2int( void *out, void *in){ ((int*) out)[0] = ((cl_char*) in)[0]; } -static void char2float( void *out, void *in) -{ - cl_char l = ((cl_char*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void char2double( void *out, void *in) -{ - cl_char l = ((cl_char*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void char2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_char*) in)[0]; } -static void char2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; } -static void ushort2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2char( void *out, void *in){ ((char*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2short( void *out, void *in){ ((short*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2int( void *out, void *in){ ((int*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2float( void *out, void *in) -{ - cl_ushort l = ((cl_ushort*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void ushort2double( void *out, void *in) -{ - cl_ushort l = ((cl_ushort*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void ushort2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; } -static void short2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_short*) in)[0]; } -static void short2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_short*) in)[0]; } -static void short2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_short*) in)[0]; } -static void short2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_short*) in)[0]; } -static void short2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; } -static void short2float( void *out, void *in) -{ - cl_short l = ((cl_short*) in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void short2double( void *out, void *in) -{ - cl_short l = ((cl_short*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void short2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_short*) in)[0]; } -static void short2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; } -static void uint2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2short( void *out, void *in){ ((short*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2float( void *out, void *in) -{ - // Use volatile to prevent optimization by Clang compiler - volatile cl_uint l = ((cl_uint *)in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uint2double( void *out, void *in) -{ - cl_uint l = ((cl_uint*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void uint2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; } -static void int2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_int*) in)[0]; } -static void int2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_int*) in)[0]; } -static void int2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_int*) in)[0]; } -static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) in)[0]; } -static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; } -static void int2float( void *out, void *in) -{ - // Use volatile to prevent optimization by Clang compiler - volatile cl_int l = ((cl_int *)in)[0]; - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void int2double( void *out, void *in) -{ - cl_int l = ((cl_int*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -} -static void int2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_int*) in)[0]; } -static void int2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; } -static void float2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2char( void *out, void *in){ ((cl_char*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2short( void *out, void *in){ ((cl_short*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2uint( void *out, void *in){ ((cl_uint*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2int( void *out, void *in){ ((cl_int*) out)[0] = my_rintf(((cl_float*) in)[0]); } -static void float2double( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; } -static void float2ulong( void *out, void *in) -{ -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) - // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int. - // However, fistp stores it as a signed int, and some of the test values won't - // fit into a signed int. (These test values are >= 2^63.) The result on VS2005 - // is that these end up silently (at least by default settings) clamped to - // the max lowest ulong. - cl_float x = my_rintf(((cl_float *)in)[0]); - if (x >= 9223372036854775808.0f) { - x -= 9223372036854775808.0f; - ((cl_ulong*) out)[0] = x; - ((cl_ulong*) out)[0] += 9223372036854775808ULL; - } else { - ((cl_ulong*) out)[0] = x; - } -#else - ((cl_ulong*) out)[0] = my_rintf(((cl_float*) in)[0]); +#ifdef __APPLE__ + // record the run time + vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start)); #endif -} + vlog("\n\n"); + fflush(stdout); -static void float2long( void *out, void *in){ ((cl_long*) out)[0] = llrint_clamped( ((cl_float*) in)[0] ); } -static void double2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2char( void *out, void *in){ ((cl_char*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2short( void *out, void *in){ ((cl_short*) out)[0] = rint(((cl_double*) in)[0]); } -static void double2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) rint(((cl_double*) in)[0]); } -static void double2int( void *out, void *in){ ((cl_int*) out)[0] = (int) rint(((cl_double*) in)[0]); } -static void double2float( void *out, void *in){ ((cl_float*) out)[0] = (float) ((cl_double*) in)[0]; } -static void double2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = (cl_ulong) rint(((cl_double*) in)[0]); } -static void double2long( void *out, void *in){ ((cl_long*) out)[0] = (cl_long) rint(((cl_double*) in)[0]); } -static void ulong2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_ulong*) in)[0]; } -static void ulong2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_ulong*) in)[0]; } -static void ulong2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_ulong*) in)[0]; } -static void ulong2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short)((cl_ulong*) in)[0]; } -static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_ulong*) in)[0]; } -static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; } -static void ulong2float( void *out, void *in) -{ -#if defined(_MSC_VER) && defined(_M_X64) - cl_ulong l = ((cl_ulong*) in)[0]; - float result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; - _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); - ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result)); -#else - cl_ulong l = ((cl_ulong*) in)[0]; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point emulation - * code despite which -mfloat-abi setting it is. But the emulation code in - * libgcc.a has only one rounding mode (round to nearest even in this case) - * and ignores the user rounding mode setting in hardware. - * As a result setting rounding modes in hardware won't give correct - * rounding results for type covert from 64-bit integer to float using GCC - * for ARM compiler so for testing different rounding modes, we need to use - * alternative reference function. ARM64 does have an instruction, however - * we cannot guarantee the compiler will use it. On all ARM architechures - * use emulation to calculate reference.*/ - ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm); -#else - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -#endif + return error; } -static void ulong2double( void *out, void *in) -{ -#if defined(_MSC_VER) - cl_ulong l = ((cl_ulong*) in)[0]; - double result; - cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l; -#if defined(_M_X64) - _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl)); -#else - result = sl; +#if !defined(__APPLE__) +void memset_pattern4(void *dest, const void *src_pattern, size_t bytes); #endif - ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result)); -#else - // Use volatile to prevent optimization by Clang compiler - volatile cl_ulong l = ((cl_ulong *)in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -} -static void ulong2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ulong*) in)[0]; } -static void long2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_long*) in)[0]; } -static void long2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_long*) in)[0]; } -static void long2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_long*) in)[0]; } -static void long2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short) ((cl_long*) in)[0]; } -static void long2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_long*) in)[0]; } -static void long2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_long*) in)[0]; } -static void long2float( void *out, void *in) -{ -#if defined(_MSC_VER) && defined(_M_X64) - cl_long l = ((cl_long*) in)[0]; - float result; - _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l)); - ((float*) out)[0] = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#else - cl_long l = ((cl_long*) in)[0]; -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point emulation - * code despite which -mfloat-abi setting it is. But the emulation code in - * libgcc.a has only one rounding mode (round to nearest even in this case) - * and ignores the user rounding mode setting in hardware. - * As a result setting rounding modes in hardware won't give correct - * rounding results for type covert from 64-bit integer to float using GCC - * for ARM compiler so for testing different rounding modes, we need to use - * alternative reference function. ARM64 does have an instruction, however - * we cannot guarantee the compiler will use it. On all ARM architechures - * use emulation to calculate reference.*/ - ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm)); +#if defined(_MSC_VER) +/* function is defined in "compat.h" */ #else - ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 -#endif -#endif -} -static void long2double( void *out, void *in) +double SubtractTime(uint64_t endTime, uint64_t startTime) { -#if defined(_MSC_VER) && defined(_M_X64) - cl_long l = ((cl_long*) in)[0]; - double result; + uint64_t diff = endTime - startTime; + static double conversion = 0.0; - _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l)); - ((double*) out)[0] = (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 + if (0.0 == conversion) + { +#if defined(__APPLE__) + mach_timebase_info_data_t info = { 0, 0 }; + kern_return_t err = mach_timebase_info(&info); + if (0 == err) + conversion = 1e-9 * (double)info.numer / (double)info.denom; #else - cl_long l = ((cl_long*) in)[0]; - ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0 + // This function consumes output from GetTime() above, and converts the + // time to secionds. +#warning need accurate ticks to seconds conversion factor here. Times are invalid. #endif -} -static void long2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_long*) in)[0]; } - -#define CLAMP( _lo, _x, _hi ) ( (_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) - -// Done by hand -static void uchar2char_sat( void *out, void *in){ cl_uchar c = ((cl_uchar*) in)[0]; ((cl_char*) out)[0] = c > 0x7f ? 0x7f : c; } -static void uchar2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527> -static void uchar2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527> -static void uchar2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; } -static void uchar2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; } -static void char2uchar_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uchar*) out)[0] = c < 0 ? 0 : c; } -static void char2ushort_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ushort*) out)[0] = c < 0 ? 0 : c; } -static void char2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_char*) in)[0]; } -static void char2uint_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uint*) out)[0] = c < 0 ? 0 : c; } -static void char2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_char*) in)[0]; } -static void char2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_char*) in)[0]; } -static void char2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_char*) in)[0]; } -static void char2ulong_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ulong*) out)[0] = c < 0 ? 0 : c; } -static void char2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; } -static void ushort2uchar_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_uchar*) out)[0] = u > 0xff ? 0xFF : u; } -static void ushort2char_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_char*) out)[0] = u > 0x7f ? 0x7F : u; } -static void ushort2short_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_short*) out)[0] = u > 0x7fff ? 0x7fFF : u; } -static void ushort2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf((cl_float)((cl_ushort*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527> -static void ushort2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_ushort*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527> -static void ushort2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; } -static void ushort2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; } -static void short2uchar_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, s, CL_UCHAR_MAX ); } -static void short2char_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, s, CL_CHAR_MAX ); } -static void short2ushort_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ushort*) out)[0] = s < 0 ? 0 : s; } -static void short2uint_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uint*) out)[0] = s < 0 ? 0 : s; } -static void short2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; } -static void short2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_short*) in)[0]; } -static void short2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_short*) in)[0]; } -static void short2ulong_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ulong*) out)[0] = s < 0 ? 0 : s; } -static void short2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; } -static void uint2uchar_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX); } -static void uint2char_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); } -static void uint2ushort_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX); } -static void uint2short_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX); } -static void uint2int_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_int*) out)[0] = CLAMP( 0, u, CL_INT_MAX); } -static void uint2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uint*) in)[0] ); } // my_fabs workaround for <rdar://problem/5965527> -static void uint2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uint*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527> -static void uint2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; } -static void uint2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; } -static void int2uchar_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, i, CL_UCHAR_MAX); } -static void int2char_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, i, CL_CHAR_MAX); } -static void int2ushort_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, i, CL_USHRT_MAX); } -static void int2short_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, i, CL_SHRT_MAX); } -static void int2uint_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uint*) out)[0] = CLAMP( 0, i, CL_INT_MAX); } -static void int2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_int*) in)[0]; } -static void int2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_int*) in)[0]; } -static void int2ulong_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ulong*) out)[0] = i < 0 ? 0 : i; } -static void int2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; } -static void float2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_UCHAR_MAX ); } -static void float2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_CHAR_MAX); } -static void float2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_USHRT_MAX ); } -static void float2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_SHRT_MAX ); } -static void float2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrintf_clamped(((cl_float*) in)[0]), CL_UINT_MAX ); } -static void float2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_INT_MAX ); } -static void float2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; } -static void float2ulong_sat( void *out, void *in) -{ -#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) - // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int. - // However, fistp stores it as a signed int, and some of the test values won't - // fit into a signed int. (These test values are >= 2^63.) The result on VS2005 - // is that these end up silently (at least by default settings) clamped to - // the max lowest ulong. - cl_float x = my_rintf(((cl_float *)in)[0]); - if (x >= 18446744073709551616.0f) { // 2^64 - ((cl_ulong*) out)[0] = 0xFFFFFFFFFFFFFFFFULL; - } else if (x < 0) { - ((cl_ulong*) out)[0] = 0; - } else if (x >= 9223372036854775808.0f) { // 2^63 - x -= 9223372036854775808.0f; - ((cl_ulong*) out)[0] = x; - ((cl_ulong*) out)[0] += 9223372036854775808ULL; - } else { - ((cl_ulong*) out)[0] = x; } -#else - float f = my_rintf(((float*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; -#endif + + // strictly speaking we should also be subtracting out timer latency here + return conversion * (double)diff; } -// The final cast used to be (cl_ulong) f, but on Linux (RHEL5 at least) -// if f = -1.0f, then (cl_ulong) f = 0xffffffff, which clearly isn't right. -// Switching it to (cl_long) f seems to fix that. -static void float2long_sat( void *out, void *in){ float f = my_rintf(((float*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; } -static void double2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_UCHAR_MAX ); } -static void double2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrint_clamped(((cl_double*) in)[0]), CL_CHAR_MAX); } -static void double2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_USHRT_MAX ); } -static void double2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_SHRT_MAX ); } -static void double2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrint_clamped(((cl_double*) in)[0]), CL_UINT_MAX ); } -static void double2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_INT_MAX ); } -static void double2float_sat( void *out, void *in){ ((cl_float*) out)[0] = (cl_float) ((double*) in)[0]; } -static void double2ulong_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; } -static void double2long_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; } -static void ulong2uchar_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); } -static void ulong2char_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); } -static void ulong2ushort_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); } -static void ulong2short_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX ); } -static void ulong2uint_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); } -static void ulong2int_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_int*) out)[0] = (cl_int) CLAMP( 0, u, CL_INT_MAX ); } -static void ulong2float_sat( void *out, void *in){ ((float*) out)[0] = my_fabsf((float) ((cl_ulong*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527> -static void ulong2double_sat( void *out, void *in){ ((double*) out)[0] = my_fabs( ((cl_ulong*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527> -static void ulong2long_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_long*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); } -static void long2uchar_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); } -static void long2char_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, u, CL_CHAR_MAX ); } -static void long2ushort_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); } -static void long2short_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, u, CL_SHRT_MAX ); } -static void long2uint_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); } -static void long2int_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_int*) out)[0] = (int) CLAMP( CL_INT_MIN, u, CL_INT_MAX ); } -static void long2float_sat( void *out, void *in){ ((float*) out)[0] = (float) ((cl_long*) in)[0]; } -static void long2double_sat( void *out, void *in){ ((double*) out)[0] = ((cl_long*) in)[0]; } -static void long2ulong_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ulong*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); } - -/* -#include <stdio.h> - -char *ground[] = { "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gTypeNames[ ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; - - -int main( void ) +#endif + +//////////////////////////////////////////////////////////////////////////////// + +static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) { - int i, j; + cl_uint i; + for (i = 0; i < count; ++i) + allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0); +} - for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ ) - for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ ) - { - vlog( "float clampf_%s%s( float );\n", gTypeNames[i], ground[j] ); - vlog( "double clampd_%s%s( double );\n", gTypeNames[i], ground[j] ); - } - return 0; +void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr); -} -*/ - - -float clampf_uchar( float ); -double clampd_uchar( double ); -float clampf_uchar_rte( float ); -double clampd_uchar_rte( double ); -float clampf_uchar_rtp( float ); -double clampd_uchar_rtp( double ); -float clampf_uchar_rtn( float ); -double clampd_uchar_rtn( double ); -float clampf_uchar_rtz( float ); -double clampd_uchar_rtz( double ); -float clampf_char( float ); -double clampd_char( double ); -float clampf_char_rte( float ); -double clampd_char_rte( double ); -float clampf_char_rtp( float ); -double clampd_char_rtp( double ); -float clampf_char_rtn( float ); -double clampd_char_rtn( double ); -float clampf_char_rtz( float ); -double clampd_char_rtz( double ); -float clampf_ushort( float ); -double clampd_ushort( double ); -float clampf_ushort_rte( float ); -double clampd_ushort_rte( double ); -float clampf_ushort_rtp( float ); -double clampd_ushort_rtp( double ); -float clampf_ushort_rtn( float ); -double clampd_ushort_rtn( double ); -float clampf_ushort_rtz( float ); -double clampd_ushort_rtz( double ); -float clampf_short( float ); -double clampd_short( double ); -float clampf_short_rte( float ); -double clampd_short_rte( double ); -float clampf_short_rtp( float ); -double clampd_short_rtp( double ); -float clampf_short_rtn( float ); -double clampd_short_rtn( double ); -float clampf_short_rtz( float ); -double clampd_short_rtz( double ); -float clampf_uint( float ); -double clampd_uint( double ); -float clampf_uint_rte( float ); -double clampd_uint_rte( double ); -float clampf_uint_rtp( float ); -double clampd_uint_rtp( double ); -float clampf_uint_rtn( float ); -double clampd_uint_rtn( double ); -float clampf_uint_rtz( float ); -double clampd_uint_rtz( double ); -float clampf_int( float ); -double clampd_int( double ); -float clampf_int_rte( float ); -double clampd_int_rte( double ); -float clampf_int_rtp( float ); -double clampd_int_rtp( double ); -float clampf_int_rtn( float ); -double clampd_int_rtn( double ); -float clampf_int_rtz( float ); -double clampd_int_rtz( double ); -float clampf_float( float ); -double clampd_float( double ); -float clampf_float_rte( float ); -double clampd_float_rte( double ); -float clampf_float_rtp( float ); -double clampd_float_rtp( double ); -float clampf_float_rtn( float ); -double clampd_float_rtn( double ); -float clampf_float_rtz( float ); -double clampd_float_rtz( double ); -float clampf_double( float ); -double clampd_double( double ); -float clampf_double_rte( float ); -double clampd_double_rte( double ); -float clampf_double_rtp( float ); -double clampd_double_rtp( double ); -float clampf_double_rtn( float ); -double clampd_double_rtn( double ); -float clampf_double_rtz( float ); -double clampd_double_rtz( double ); -float clampf_ulong( float ); -double clampd_ulong( double ); -float clampf_ulong_rte( float ); -double clampd_ulong_rte( double ); -float clampf_ulong_rtp( float ); -double clampd_ulong_rtp( double ); -float clampf_ulong_rtn( float ); -double clampd_ulong_rtn( double ); -float clampf_ulong_rtz( float ); -double clampd_ulong_rtz( double ); -float clampf_long( float ); -double clampd_long( double ); -float clampf_long_rte( float ); -double clampd_long_rte( double ); -float clampf_long_rtp( float ); -double clampd_long_rtp( double ); -float clampf_long_rtn( float ); -double clampd_long_rtn( double ); -float clampf_long_rtz( float ); -double clampd_long_rtz( double ); - -/* -#include <stdio.h> - -char *ground[] = { "", - "_rte", - "_rtp", - "_rtn", - "_rtz" - }; - -const char *gTypeNames[ ] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "double", - "ulong", "long" - }; - - -int main( void ) -{ - int i, j; +void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, + void *data); - for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ ) +// Note: May be called reentrantly +void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info) +{ + cl_int status; + // CalcRefValsBase *info = (CalcRefValsBase *)data; + cl_event calcReferenceValues = info->parent->calcReferenceValues; + + // we know that the map is done, wait for the main thread to finish + // calculating the reference values + if ((status = + clSetEventCallback(calcReferenceValues, CL_COMPLETE, + CalcReferenceValuesComplete, (void *)&info))) { - vlog( "{\t" ); - for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ ) - vlog( "clampf_%s%s,\t", gTypeNames[i], ground[j] ); + vlog_error("ERROR: clSetEventCallback failed in " + "MapResultValuesComplete with status: %d\n", + status); + gFailCount++; // not thread safe -- being lazy here + } - vlog( "\t},\n" ); + // this thread no longer needs its reference to info->calcReferenceValues, + // so release it + if ((status = clReleaseEvent(calcReferenceValues))) + { + vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed " + "with status: %d\n", + status); + gFailCount++; // not thread safe -- being lazy here } - return 0; + // no need to flush since we didn't enqueue anything + // e was already released by WriteInputBufferComplete. It should be + // destroyed automatically soon after we exit. } -*/ -clampf gClampFloat[ kTypeCount ][kRoundingModeCount] = { - { clampf_uchar, clampf_uchar_rte, clampf_uchar_rtp, clampf_uchar_rtn, clampf_uchar_rtz, }, - { clampf_char, clampf_char_rte, clampf_char_rtp, clampf_char_rtn, clampf_char_rtz, }, - { clampf_ushort, clampf_ushort_rte, clampf_ushort_rtp, clampf_ushort_rtn, clampf_ushort_rtz, }, - { clampf_short, clampf_short_rte, clampf_short_rtp, clampf_short_rtn, clampf_short_rtz, }, - { clampf_uint, clampf_uint_rte, clampf_uint_rtp, clampf_uint_rtn, clampf_uint_rtz, }, - { clampf_int, clampf_int_rte, clampf_int_rtp, clampf_int_rtn, clampf_int_rtz, }, - { clampf_float, clampf_float_rte, clampf_float_rtp, clampf_float_rtn, clampf_float_rtz, }, - { clampf_double, clampf_double_rte, clampf_double_rtp, clampf_double_rtn, clampf_double_rtz, }, - { clampf_ulong, clampf_ulong_rte, clampf_ulong_rtp, clampf_ulong_rtn, clampf_ulong_rtz, }, - { clampf_long, clampf_long_rte, clampf_long_rtp, clampf_long_rtn, clampf_long_rtz, } -}; - -clampd gClampDouble[ kTypeCount ][kRoundingModeCount] = { - { clampd_uchar, clampd_uchar_rte, clampd_uchar_rtp, clampd_uchar_rtn, clampd_uchar_rtz, }, - { clampd_char, clampd_char_rte, clampd_char_rtp, clampd_char_rtn, clampd_char_rtz, }, - { clampd_ushort, clampd_ushort_rte, clampd_ushort_rtp, clampd_ushort_rtn, clampd_ushort_rtz, }, - { clampd_short, clampd_short_rte, clampd_short_rtp, clampd_short_rtn, clampd_short_rtz, }, - { clampd_uint, clampd_uint_rte, clampd_uint_rtp, clampd_uint_rtn, clampd_uint_rtz, }, - { clampd_int, clampd_int_rte, clampd_int_rtp, clampd_int_rtn, clampd_int_rtz, }, - { clampd_float, clampd_float_rte, clampd_float_rtp, clampd_float_rtn, clampd_float_rtz, }, - { clampd_double, clampd_double_rte, clampd_double_rtp, clampd_double_rtn, clampd_double_rtz, }, - { clampd_ulong, clampd_ulong_rte, clampd_ulong_rtp, clampd_ulong_rtn, clampd_ulong_rtz, }, - { clampd_long, clampd_long_rte, clampd_long_rtp, clampd_long_rtn, clampd_long_rtz, } -}; -#if defined (_WIN32) -#define __attribute__(X) -#endif -static inline float fclamp( float lo, float v, float hi ) __attribute__ ((always_inline)); -static inline double dclamp( double lo, double v, double hi ) __attribute__ ((always_inline)); - -static inline float fclamp( float lo, float v, float hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; } -static inline double dclamp( double lo, double v, double hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; } - -// Clamp unsaturated inputs into range so we don't get test errors: -float clampf_uchar( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); } -double clampd_uchar( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); } -float clampf_uchar_rte( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); } -double clampd_uchar_rte( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); } -float clampf_uchar_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 255.0f ); } -double clampd_uchar_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 255.0 ); } -float clampf_uchar_rtn( float f ) { return fclamp( -0.0f, f, 256.0f - 128.0f * FLT_EPSILON); } -double clampd_uchar_rtn( double f ) { return dclamp( -0.0, f, 256.0 - 128.0 * DBL_EPSILON); } -float clampf_uchar_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 256.0f - 128.0f * FLT_EPSILON); } -double clampd_uchar_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 256.0 - 128.0f * DBL_EPSILON); } - -float clampf_char( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); } -double clampd_char( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); } -float clampf_char_rte( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); } -double clampd_char_rte( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); } -float clampf_char_rtp( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 127.f ); } -double clampd_char_rtp( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 127. ); } -float clampf_char_rtn( float f ) { return fclamp( -128.0f, f, 128.f - 64.0f*FLT_EPSILON ); } -double clampd_char_rtn( double f ) { return dclamp( -128.0, f, 128. - 64.0*DBL_EPSILON ); } -float clampf_char_rtz( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 128.f - 64.0f*FLT_EPSILON ); } -double clampd_char_rtz( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 128. - 64.0*DBL_EPSILON ); } - -float clampf_ushort( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); } -double clampd_ushort( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); } -float clampf_ushort_rte( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); } -double clampd_ushort_rte( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); } -float clampf_ushort_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65535.0f ); } -double clampd_ushort_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65535.0 ); } -float clampf_ushort_rtn( float f ) { return fclamp( -0.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); } -double clampd_ushort_rtn( double f ) { return dclamp( -0.0, f, 65536.0 - 32768.0 * DBL_EPSILON); } -float clampf_ushort_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); } -double clampd_ushort_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65536.0 - 32768.0f * DBL_EPSILON); } - -float clampf_short( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); } -double clampd_short( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); } -float clampf_short_rte( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); } -double clampd_short_rte( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); } -float clampf_short_rtp( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32767.f ); } -double clampd_short_rtp( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32767. ); } -float clampf_short_rtn( float f ) { return fclamp( -32768.0f, f, 32768.f - 16384.0f*FLT_EPSILON ); } -double clampd_short_rtn( double f ) { return dclamp( -32768.0, f, 32768. - 16384.0*DBL_EPSILON ); } -float clampf_short_rtz( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32768.f - 16384.0f*FLT_EPSILON ); } -double clampd_short_rtz( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32768. - 16384.0*DBL_EPSILON ); } - -float clampf_uint( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); } -float clampf_uint_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint_rte( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); } -float clampf_uint_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); } -double clampd_uint_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, CL_UINT_MAX ); } -float clampf_uint_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); } -double clampd_uint_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) ); } -float clampf_uint_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); } -double clampd_uint_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)); } - -float clampf_int( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rte( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rte( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rtp( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtp( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX ); } -float clampf_int_rtn( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtn( double f ) { return dclamp( INT_MIN, f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } -float clampf_int_rtz( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); } -double clampd_int_rtz( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); } - -float clampf_float( float f ){ return f; } -double clampd_float( double f ){ return f; } -float clampf_float_rte( float f ){ return f; } -double clampd_float_rte( double f ){ return f; } -float clampf_float_rtp( float f ){ return f; } -double clampd_float_rtp( double f ){ return f; } -float clampf_float_rtn( float f ){ return f; } -double clampd_float_rtn( double f ){ return f; } -float clampf_float_rtz( float f ){ return f; } -double clampd_float_rtz( double f ){ return f; } - -float clampf_double( float f ){ return f; } -double clampd_double( double f ){ return f; } -float clampf_double_rte( float f ){ return f; } -double clampd_double_rte( double f ){ return f; } -float clampf_double_rtp( float f ){ return f; } -double clampd_double_rtp( double f ){ return f; } -float clampf_double_rtn( float f ){ return f; } -double clampd_double_rtn( double f ){ return f; } -float clampf_double_rtz( float f ){ return f; } -double clampd_double_rtz( double f ){ return f; } - -float clampf_ulong( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rte( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } -float clampf_ulong_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); } -double clampd_ulong_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); } - -float clampf_long( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rte( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rte( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtp( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtp( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtn( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtn( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } -float clampf_long_rtz( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); } -double clampd_long_rtz( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); } - -#pragma mark - - -int alwaysPass( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int alwaysFail( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_uchar( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_char( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_ushort( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_short( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_uint( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_int( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_ulong( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_long( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_float( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); -int check_double( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); - -void init_uchar( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_char( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_ushort( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_short( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_uint( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_int( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_float( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_double( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_ulong( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); -void init_long( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d ); - -InitDataFunc gInitFunctions[ kTypeCount ] = { - init_uchar, init_char, - init_ushort, init_short, - init_uint, init_int, - init_float, init_double, - init_ulong, init_long - }; - - -CheckResults gCheckResults[ kTypeCount ] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_double, check_ulong, check_long - }; -#if !defined (__APPLE__) -#define UNUSED -#else -#define UNUSED __attribute__((unused)) -#endif +void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, + void *data) +{ + std::unique_ptr<CalcRefValsBase> &info = + *(std::unique_ptr<CalcRefValsBase> *)data; + + cl_uint vectorSize = info->vectorSize; + cl_uint count = info->parent->count; + Type outType = + info->parent->outType; // the data type of the conversion result + Type inType = info->parent->inType; // the data type of the conversion input + size_t j; + cl_int error; + cl_event doneBarrier = info->parent->doneBarrier; + + // report spurious error condition + if (CL_SUCCESS != status) + { + vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", + status); + gFailCount++; // lazy about thread safety here + return; + } -int alwaysPass( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize){ return 0; } -int alwaysFail( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize ){ return -1; } + // Now we know that both results have been mapped back from the device, and + // the main thread is done calculating the reference results. It is now time + // to check the results. -int check_uchar( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_uchar *t = (const cl_uchar*)test; - const cl_uchar *c = (const cl_uchar*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + // verify results + void *mapped = info->p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uchar)0)) + // Patch up NaNs conversions to integer to zero -- these can be converted to + // any integer + if (outType != kfloat && outType != kdouble) + { + if (inType == kfloat) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + float *inp = (float *)gIn; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)mapped + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } } - - return 0; -} - -int check_char( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_char *t = (const cl_char*)test; - const cl_char *c = (const cl_char*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_char)0)) + if (inType == kdouble) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + double *inp = (double *)gIn; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)mapped + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } } + } + else if (inType == kfloat || inType == kdouble) + { // outtype and intype is float or double. NaN conversions for float <-> + // double can be any NaN + if (inType == kfloat && outType == kdouble) + { + float *inp = (float *)gIn; + double *outp = (double *)mapped; + for (j = 0; j < count; j++) + { + if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; + } + } + if (inType == kdouble && outType == kfloat) + { + double *inp = (double *)gIn; + float *outp = (float *)mapped; + for (j = 0; j < count; j++) + { + if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; + } + } + } - return 0; -} + if (memcmp(mapped, gRef, count * gTypeSizes[outType])) + info->result = + info->check_result(mapped, count, vectorSizes[vectorSize]); + else + info->result = 0; -int check_ushort( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ushort *t = (const cl_ushort*)test; - const cl_ushort *c = (const cl_ushort*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + // Fill the output buffer with junk and release it + { + cl_uint pattern = 0xffffdead; + memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]); + if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize], + mapped, 0, NULL, NULL))) + { + vlog_error("ERROR: clEnqueueUnmapMemObject failed in " + "CalcReferenceValuesComplete (%d)\n", + error); + gFailCount++; + } + } - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ushort)0)) + if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1)) + { + if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] ); - return i + 1; + vlog_error("ERROR: clSetUserEventStatus failed in " + "CalcReferenceValuesComplete (err: %d). We're probably " + "going to deadlock.\n", + status); + gFailCount++; + return; } - return 0; + if ((status = clReleaseEvent(doneBarrier))) + { + vlog_error("ERROR: clReleaseEvent failed in " + "CalcReferenceValuesComplete (err: %d).\n", + status); + gFailCount++; + return; + } + } + // e was already released by WriteInputBufferComplete. It should be + // destroyed automatically soon after all the calls to + // CalcReferenceValuesComplete exit. } -int check_short( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_short *t = (const cl_short*)test; - const cl_short *c = (const cl_short*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; +// - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_short)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } +namespace conv_test { - return 0; -} +//////////////////////////////////////////////////////////////////////////////// -int check_uint( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) +cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) { - const cl_uint *t = (const cl_uint*)test; - const cl_uint *c = (const cl_uint*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + DataInitBase *info = (DataInitBase *)p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uint)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } + info->init(job_id, thread_id); - return 0; + return CL_SUCCESS; } -int check_int( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) +//////////////////////////////////////////////////////////////////////////////// + +cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) { - const cl_int *t = (const cl_int*)test; - const cl_int *c = (const cl_int*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + DataInitBase *info = (DataInitBase *)p; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_int)0)) - { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] ); - return i + 1; - } + cl_uint count = info->size; + Type inType = info->inType; + Type outType = info->outType; + RoundingMode round = info->round; + size_t j; - return 0; -} + Force64BitFPUPrecision(); -int check_ulong( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ulong *t = (const cl_ulong*)test; - const cl_ulong *c = (const cl_ulong*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType]; + void *a = (cl_uchar *)gAllowZ + job_id * count; + void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType]; - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ulong)0)) + + if (outType != inType) + { + // create the reference while we wait +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) + /* ARM VFP doesn't have hardware instruction for converting from 64-bit + * integer to float types, hence GCC ARM uses the floating-point + * emulation code despite which -mfloat-abi setting it is. But the + * emulation code in libgcc.a has only one rounding mode (round to + * nearest even in this case) and ignores the user rounding mode setting + * in hardware. As a result setting rounding modes in hardware won't + * give correct rounding results for type covert from 64-bit integer to + * float using GCC for ARM compiler so for testing different rounding + * modes, we need to use alternative reference function. ARM64 does have + * an instruction, however we cannot guarantee the compiler will use it. + * On all ARM architechures use emulation to calculate reference.*/ + switch (round) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] ); - return i + 1; + /* conversions to floating-point type use the current rounding mode. + * The only default floating-point rounding mode supported is round + * to nearest even i.e the current rounding mode will be _rte for + * floating-point types. */ + case kDefaultRoundingMode: qcom_rm = qcomRTE; break; + case kRoundToNearestEven: qcom_rm = qcomRTE; break; + case kRoundUp: qcom_rm = qcomRTP; break; + case kRoundDown: qcom_rm = qcomRTN; break; + case kRoundTowardZero: qcom_rm = qcomRTZ; break; + default: + vlog_error("ERROR: undefined rounding mode %d\n", round); + break; } + qcom_sat = info->sat; +#endif - return 0; -} + RoundingMode oldRound = set_round(round, outType); -int check_long( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_long *t = (const cl_long*)test; - const cl_long *c = (const cl_long*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; + if (info->sat) + info->conv_array_sat(d, s, count); + else + info->conv_array(d, s, count); - for( i = 0; i < count; i++ ) - if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_long)0)) + set_round(oldRound, outType); + + // Decide if we allow a zero result in addition to the correctly rounded + // one + memset(a, 0, count); + if (gForceFTZ) { - vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] ); - return i + 1; + if (inType == kfloat || outType == kfloat) + setAllowZ((uint8_t *)a, (uint32_t *)s, count); } + } + else + { + // Copy the input to the reference + memcpy(d, s, info->size * gTypeSizes[inType]); + } - return 0; -} - -int check_float( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_uint *t = (const cl_uint*)test; - const cl_uint *c = (const cl_uint*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if (t[i] != c[i] && - // Allow nan's to be binary different - !((t[i] & 0x7fffffffU) > 0x7f800000U && - (c[i] & 0x7fffffffU) > 0x7f800000U) && - !(a[i] != (cl_uchar)0 && - t[i] == (c[i] & 0x80000000U))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", - vectorSize, i, ((float*)correct)[i], ((float*)test)[i] ); - return i + 1; + // Patch up NaNs conversions to integer to zero -- these can be converted to + // any integer + if (info->outType != kfloat && info->outType != kdouble) + { + if (inType == kfloat) + { + float *inp = (float *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)d + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } + } + if (inType == kdouble) + { + double *inp = (double *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) + memset((char *)d + j * gTypeSizes[outType], 0, + gTypeSizes[outType]); + } + } + } + else if (inType == kfloat || inType == kdouble) + { // outtype and intype is float or double. NaN conversions for float <-> + // double can be any NaN + if (inType == kfloat && outType == kdouble) + { + float *inp = (float *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) ((double *)d)[j] = NAN; + } } + if (inType == kdouble && outType == kfloat) + { + double *inp = (double *)s; + for (j = 0; j < count; j++) + { + if (isnan(inp[j])) ((float *)d)[j] = NAN; + } + } + } - return 0; + return CL_SUCCESS; } -int check_double( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize ) -{ - const cl_ulong *t = (const cl_ulong*)test; - const cl_ulong *c = (const cl_ulong*)correct; - const cl_uchar *a = (const cl_uchar*)allowZ; - uint32_t i; - - for( i = 0; i < count; i++ ) - if (t[i] != c[i] && - // Allow nan's to be binary different - !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL && - (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) && - !(a[i] != (cl_uchar)0 && - t[i] == (c[i] & 0x8000000000000000ULL))) { - vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n", - vectorSize, i, ((double*)correct)[i], ((double*)test)[i] ); - return i + 1; - } +//////////////////////////////////////////////////////////////////////////////// +uint64_t GetTime(void) +{ +#if defined(__APPLE__) + return mach_absolute_time(); +#elif defined(_MSC_VER) + return ReadTime(); +#else + // mach_absolute_time is a high precision timer with precision < 1 + // microsecond. +#warning need accurate clock here. Times are invalid. return 0; +#endif } +//////////////////////////////////////////////////////////////////////////////// -void init_uchar( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) +// Note: not called reentrantly +void WriteInputBufferComplete(void *data) { - cl_uchar *o = (cl_uchar *)out; - int i; + cl_int status; + WriteInputBufferInfo *info = (WriteInputBufferInfo *)data; + cl_uint count = info->count; + int vectorSize; - for( i = 0; i < count; i++ ) - o[i] = start++; -} + info->barrierCount = gMaxVectorSize - gMinVectorSize; -void init_char( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) -{ - char *o = (char *)out; - int i; + // now that we know that the write buffer is complete, enqueue callbacks to + // wait for the main thread to finish calculating the reference results. + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + size_t workItemCount = + (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]); - for( i = 0; i < count; i++ ) - o[i] = start++; -} + if ((status = conv_test::RunKernel(info->calcInfo[vectorSize]->kernel, + gInBuffer, gOutBuffers[vectorSize], + workItemCount))) + { + gFailCount++; + return; + } -void init_ushort( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d ) -{ - cl_ushort *o = (cl_ushort *)out; - int i; + info->calcInfo[vectorSize]->p = clEnqueueMapBuffer( + gQueue, gOutBuffers[vectorSize], CL_TRUE, + CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0, + NULL, NULL, &status); + { + if (status) + { + vlog_error("ERROR: WriteInputBufferComplete calback failed " + "with status: %d\n", + status); + gFailCount++; + return; + } + } + } - for( i = 0; i < count; i++ ) - o[i] = start++; -} + for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) + { + MapResultValuesComplete(info->calcInfo[vectorSize]); + } -void init_short( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, UNUSED Type destType, uint64_t start, int count, MTdata UNUSED d ) -{ - short *o = (short *)out; - int i; + // Make sure the work starts moving -- otherwise we may deadlock + if ((status = clFlush(gQueue))) + { + vlog_error( + "ERROR: WriteInputBufferComplete calback failed with status: %d\n", + status); + gFailCount++; + return; + } - for( i = 0; i < count; i++ ) - o[i] = start++; + // e was already released by the main thread. It should be destroyed + // automatically soon after we exit. } -void init_uint( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) +//////////////////////////////////////////////////////////////////////////////// + +cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, + RoundingMode round, int vectorSize, cl_kernel *outKernel) { - static const unsigned int specialValuesUInt[] = { - INT_MIN, INT_MIN + 1, INT_MIN + 2, - -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3, - -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3, - -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3, - -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3, - -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3, - -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3, - -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3, - -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3, - -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3, - -4, -3, -2, -1, 0, 1, 2, 3, 4, - (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3, - (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3, - (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3, - (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3, - (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3, - (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3, - (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3, - (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3, - (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3, - INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above - UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX - }; - - cl_uint *o = (cl_uint *)out; - int i; + cl_program program; + char testName[256]; + int error = 0; - for( i = 0; i < count; i++) { - if( gIsEmbedded ) - o[i] = (cl_uint) genrand_int32(d); - else - o[i] = (cl_uint)i + start; - } + std::ostringstream source; + if (outType == kdouble || inType == kdouble) + source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - if( 0 == start ) + // Create the program. This is a bit complicated because we are trying to + // avoid byte and short stores. + if (0 == vectorSize) { - size_t tableSize = sizeof( specialValuesUInt ); - if( sizeof( cl_uint) * count < tableSize ) - tableSize = sizeof( cl_uint) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesUInt, tableSize ); + // Create the type names. + char inName[32]; + char outName[32]; + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + sprintf(testName, "test_implicit_%s_%s", outName, inName); + + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = src[i];\n"; + source << "}\n"; + + vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], + gTypeNames[outType]); + fflush(stdout); } -} + else + { + int vectorSizetmp = vectorSizes[vectorSize]; -void init_int( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) -{ - static const unsigned int specialValuesInt[] = { - INT_MIN, INT_MIN + 1, INT_MIN + 2, - -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3, - -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3, - -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3, - -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3, - -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3, - -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3, - -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3, - -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3, - -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3, - -4, -3, -2, -1, 0, 1, 2, 3, 4, - (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3, - (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3, - (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3, - (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3, - (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3, - (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3, - (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3, - (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3, - (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3, - INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above - UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX - }; - - int *o = (int *)out; - int i; + // Create the type names. + char convertString[128]; + char inName[32]; + char outName[32]; + switch (vectorSizetmp) + { + case 1: + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + snprintf(convertString, sizeof(convertString), "convert_%s%s%s", + outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s", convertString, inName); + vlog("Building %s( %s ) test\n", convertString, inName); + break; + case 3: + strncpy(inName, gTypeNames[inType], sizeof(inName)); + strncpy(outName, gTypeNames[outType], sizeof(outName)); + snprintf(convertString, sizeof(convertString), + "convert_%s3%s%s", outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s3", convertString, inName); + vlog("Building %s( %s3 ) test\n", convertString, inName); + break; + default: + snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], + vectorSizetmp); + snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], + vectorSizetmp); + snprintf(convertString, sizeof(convertString), "convert_%s%s%s", + outName, gSaturationNames[sat], + gRoundingModeNames[round]); + snprintf(testName, 256, "test_%s_%s", convertString, inName); + vlog("Building %s( %s ) test\n", convertString, inName); + break; + } + fflush(stdout); - for( i = 0; i < count; i++ ) { - if( gIsEmbedded ) { - o[i] = (int) genrand_int32(d); - } - else { - o[i] = (int) i + start; - } + if (vectorSizetmp == 3) + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " if( i + 1 < get_global_size(0))\n"; + source << " vstore3( " << convertString + << "( vload3( i, src)), i, dest );\n"; + source << " else\n"; + source << " {\n"; + source << " " << inName << "3 in;\n"; + source << " " << outName << "3 out;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " in.y = src[3*i+1];\n"; + source << " in.x = src[3*i];\n"; + source << " out = " << convertString << "( in ); \n"; + source << " dest[3*i] = out.x;\n"; + source << " if( 0 == (i & 1) )\n"; + source << " dest[3*i+1] = out.y;\n"; + source << " }\n"; + source << "}\n"; + } + else + { + source << "__kernel void " << testName << "( __global " << inName + << " *src, __global " << outName << " *dest )\n"; + source << "{\n"; + source << " size_t i = get_global_id(0);\n"; + source << " dest[i] = " << convertString << "( src[i] );\n"; + source << "}\n"; + } } + *outKernel = NULL; - if( 0 == start ) + const char *flags = NULL; + if (gForceFTZ) flags = "-cl-denorms-are-zero"; + + // build it + std::string sourceString = source.str(); + const char *programSource = sourceString.c_str(); + error = create_single_kernel_helper(gContext, &program, outKernel, 1, + &programSource, testName, flags); + if (error) { - size_t tableSize = sizeof( specialValuesInt ); - if( sizeof( int) * count < tableSize ) - tableSize = sizeof( int) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesInt, tableSize ); + vlog_error("Failed to build kernel/program (err = %d).\n", error); + return NULL; } + + return program; } -void init_float( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d ) +// + +int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount) { - static const float specialValuesFloat[] = { - -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38), - MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f, - -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), - MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), - MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), - MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, - +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38), - MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f, - +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25), - MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27), - MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), - MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f - }; - - cl_uint *o = (cl_uint *)out; - int i; + // The global dimensions are just the blockCount to execute since we haven't + // set up multiple queues for multiple devices. + int error; - for( i = 0; i < count; i++ ) { - if( gIsEmbedded ) - o[i] = (cl_uint) genrand_int32(d); - else - o[i] = (cl_uint) i + start; - } + error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf); + error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf); - if( 0 == start ) + if (error) { - size_t tableSize = sizeof( specialValuesFloat ); - if( sizeof( float) * count < tableSize ) - tableSize = sizeof( float) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesFloat, tableSize ); + vlog_error("FAILED -- could not set kernel args (%d)\n", error); + return error; } - if( kUnsaturated == sat ) + if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, + NULL, 0, NULL, NULL))) { - clampf func = gClampFloat[ destType ][round]; - float *f = (float *)out; - - for( i = 0; i < count; i++ ) - f[i] = func( f[i] ); + vlog_error("FAILED -- could not execute kernel (%d)\n", error); + return error; } -} - -// used to convert a bucket of bits into a search pattern through double -static inline double DoubleFromUInt32( uint32_t bits ); -static inline double DoubleFromUInt32( uint32_t bits ) -{ - union{ uint64_t u; double d;} u; - // split 0x89abcdef to 0x89abc00000000def - u.u = bits & 0xfffU; - u.u |= (uint64_t) (bits & ~0xfffU) << 32; - - // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s - u.u -= (bits & 0x800U) << 1; - - // return result - return u.d; + return 0; } -// A table of more difficult cases to get right -static const double specialValuesDouble[] = { - -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8), - MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12), MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), - MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), - MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5, - -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0, - - MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), - MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5, - +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), - MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074), - MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, - - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), - MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), - MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), - MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), - MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), - MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10), -}; - -void init_double( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata UNUSED d ) +int GetTestCase(const char *name, Type *outType, Type *inType, + SaturationMode *sat, RoundingMode *round) { - double *o = (double*)out; int i; - for( i = 0; i < count; i++ ) - { - uint64_t z = i + start; - o[i] = DoubleFromUInt32( (uint32_t) z ^ (uint32_t) (z >> 32)); - } + // Find the return type + for (i = 0; i < kTypeCount; i++) + if (name == strstr(name, gTypeNames[i])) + { + *outType = (Type)i; + name += strlen(gTypeNames[i]); - if( 0 == start ) - { - size_t tableSize = sizeof( specialValuesDouble ); - if( sizeof( cl_double) * count < tableSize ) - tableSize = sizeof( cl_double) * count; - memcpy( (char*)(o + i) - tableSize, specialValuesDouble, tableSize ); - } + break; + } - if( 0 == sat ) - { - clampd func = gClampDouble[ destType ][round]; + if (i == kTypeCount) return -1; - for( i = 0; i < count; i++ ) - o[i] = func( o[i] ); - } -} + // Check to see if _sat appears next + *sat = (SaturationMode)0; + for (i = 1; i < kSaturationModeCount; i++) + if (name == strstr(name, gSaturationNames[i])) + { + *sat = (SaturationMode)i; + name += strlen(gSaturationNames[i]); + break; + } -cl_ulong random64( MTdata d ) -{ - return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32); -} + *round = (RoundingMode)0; + for (i = 1; i < kRoundingModeCount; i++) + if (name == strstr(name, gRoundingModeNames[i])) + { + *round = (RoundingMode)i; + name += strlen(gRoundingModeNames[i]); + break; + } -void init_ulong( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d ) -{ - cl_ulong *o = (cl_ulong *)out; - cl_ulong i, j, k; + if (*name != '_') return -2; + name++; - i = 0; - if( start == 0 ) - { - //Try various powers of two - for( j = 0; j < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[j] = (cl_ulong) 1 << j; - i = j; - - // try the complement of those - for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[i++] = ~((cl_ulong) 1 << j); - - //Try various negative powers of two - for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ ) - o[i++] = (cl_ulong) 0xFFFFFFFFFFFFFFFEULL << j; - - //try various powers of two plus 1, shifted by various amounts - for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ ) - for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ ) - o[i++] = (((cl_ulong) 1 << j) + 1) << k; - - //try various powers of two minus 1 - for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ ) - for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ ) - o[i++] = (((cl_ulong) 1 << j) - 1) << k; - - // Other patterns - cl_ulong pattern[] = { 0x3333333333333333ULL, 0x5555555555555555ULL, 0x9999999999999999ULL, 0x6666666666666666ULL, 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL }; - cl_ulong mask[] = { 0xffffffffffffffffULL, 0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL, 0xffffffff00000000ULL }; - for( j = 0; i < (cl_ulong) count && j < sizeof(pattern) / sizeof( pattern[0]); j++ ) - for( k = 0; i + 2 <= (cl_ulong) count && k < sizeof(mask) / sizeof( mask[0]); k++ ) - { - o[i++] = pattern[j] & mask[k]; - o[i++] = pattern[j] & ~mask[k]; - } - } + for (i = 0; i < kTypeCount; i++) + if (name == strstr(name, gTypeNames[i])) + { + *inType = (Type)i; + name += strlen(gTypeNames[i]); - for( ; i < (cl_ulong) count; i++ ) - o[i] = random64(d); -} + break; + } -void init_long( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d ) -{ - init_ulong( out, sat, round, destType, start, count, d ); -} + if (i == kTypeCount) return -3; -// ====== - -void uchar2uchar_many( void *out, void *in, size_t n); -void uchar2uchar_sat_many( void *out, void *in, size_t n); -void char2uchar_many( void *out, void *in, size_t n); -void char2uchar_sat_many( void *out, void *in, size_t n); -void ushort2uchar_many( void *out, void *in, size_t n); -void ushort2uchar_sat_many( void *out, void *in, size_t n); -void short2uchar_many( void *out, void *in, size_t n); -void short2uchar_sat_many( void *out, void *in, size_t n); -void uint2uchar_many( void *out, void *in, size_t n); -void uint2uchar_sat_many( void *out, void *in, size_t n); -void int2uchar_many( void *out, void *in, size_t n); -void int2uchar_sat_many( void *out, void *in, size_t n); -void float2uchar_many( void *out, void *in, size_t n); -void float2uchar_sat_many( void *out, void *in, size_t n); -void double2uchar_many( void *out, void *in, size_t n); -void double2uchar_sat_many( void *out, void *in, size_t n); -void ulong2uchar_many( void *out, void *in, size_t n); -void ulong2uchar_sat_many( void *out, void *in, size_t n); -void long2uchar_many( void *out, void *in, size_t n); -void long2uchar_sat_many( void *out, void *in, size_t n); -void uchar2char_many( void *out, void *in, size_t n); -void uchar2char_sat_many( void *out, void *in, size_t n); -void char2char_many( void *out, void *in, size_t n); -void char2char_sat_many( void *out, void *in, size_t n); -void ushort2char_many( void *out, void *in, size_t n); -void ushort2char_sat_many( void *out, void *in, size_t n); -void short2char_many( void *out, void *in, size_t n); -void short2char_sat_many( void *out, void *in, size_t n); -void uint2char_many( void *out, void *in, size_t n); -void uint2char_sat_many( void *out, void *in, size_t n); -void int2char_many( void *out, void *in, size_t n); -void int2char_sat_many( void *out, void *in, size_t n); -void float2char_many( void *out, void *in, size_t n); -void float2char_sat_many( void *out, void *in, size_t n); -void double2char_many( void *out, void *in, size_t n); -void double2char_sat_many( void *out, void *in, size_t n); -void ulong2char_many( void *out, void *in, size_t n); -void ulong2char_sat_many( void *out, void *in, size_t n); -void long2char_many( void *out, void *in, size_t n); -void long2char_sat_many( void *out, void *in, size_t n); -void uchar2ushort_many( void *out, void *in, size_t n); -void uchar2ushort_sat_many( void *out, void *in, size_t n); -void char2ushort_many( void *out, void *in, size_t n); -void char2ushort_sat_many( void *out, void *in, size_t n); -void ushort2ushort_many( void *out, void *in, size_t n); -void ushort2ushort_sat_many( void *out, void *in, size_t n); -void short2ushort_many( void *out, void *in, size_t n); -void short2ushort_sat_many( void *out, void *in, size_t n); -void uint2ushort_many( void *out, void *in, size_t n); -void uint2ushort_sat_many( void *out, void *in, size_t n); -void int2ushort_many( void *out, void *in, size_t n); -void int2ushort_sat_many( void *out, void *in, size_t n); -void float2ushort_many( void *out, void *in, size_t n); -void float2ushort_sat_many( void *out, void *in, size_t n); -void double2ushort_many( void *out, void *in, size_t n); -void double2ushort_sat_many( void *out, void *in, size_t n); -void ulong2ushort_many( void *out, void *in, size_t n); -void ulong2ushort_sat_many( void *out, void *in, size_t n); -void long2ushort_many( void *out, void *in, size_t n); -void long2ushort_sat_many( void *out, void *in, size_t n); -void uchar2short_many( void *out, void *in, size_t n); -void uchar2short_sat_many( void *out, void *in, size_t n); -void char2short_many( void *out, void *in, size_t n); -void char2short_sat_many( void *out, void *in, size_t n); -void ushort2short_many( void *out, void *in, size_t n); -void ushort2short_sat_many( void *out, void *in, size_t n); -void short2short_many( void *out, void *in, size_t n); -void short2short_sat_many( void *out, void *in, size_t n); -void uint2short_many( void *out, void *in, size_t n); -void uint2short_sat_many( void *out, void *in, size_t n); -void int2short_many( void *out, void *in, size_t n); -void int2short_sat_many( void *out, void *in, size_t n); -void float2short_many( void *out, void *in, size_t n); -void float2short_sat_many( void *out, void *in, size_t n); -void double2short_many( void *out, void *in, size_t n); -void double2short_sat_many( void *out, void *in, size_t n); -void ulong2short_many( void *out, void *in, size_t n); -void ulong2short_sat_many( void *out, void *in, size_t n); -void long2short_many( void *out, void *in, size_t n); -void long2short_sat_many( void *out, void *in, size_t n); -void uchar2uint_many( void *out, void *in, size_t n); -void uchar2uint_sat_many( void *out, void *in, size_t n); -void char2uint_many( void *out, void *in, size_t n); -void char2uint_sat_many( void *out, void *in, size_t n); -void ushort2uint_many( void *out, void *in, size_t n); -void ushort2uint_sat_many( void *out, void *in, size_t n); -void short2uint_many( void *out, void *in, size_t n); -void short2uint_sat_many( void *out, void *in, size_t n); -void uint2uint_many( void *out, void *in, size_t n); -void uint2uint_sat_many( void *out, void *in, size_t n); -void int2uint_many( void *out, void *in, size_t n); -void int2uint_sat_many( void *out, void *in, size_t n); -void float2uint_many( void *out, void *in, size_t n); -void float2uint_sat_many( void *out, void *in, size_t n); -void double2uint_many( void *out, void *in, size_t n); -void double2uint_sat_many( void *out, void *in, size_t n); -void ulong2uint_many( void *out, void *in, size_t n); -void ulong2uint_sat_many( void *out, void *in, size_t n); -void long2uint_many( void *out, void *in, size_t n); -void long2uint_sat_many( void *out, void *in, size_t n); -void uchar2int_many( void *out, void *in, size_t n); -void uchar2int_sat_many( void *out, void *in, size_t n); -void char2int_many( void *out, void *in, size_t n); -void char2int_sat_many( void *out, void *in, size_t n); -void ushort2int_many( void *out, void *in, size_t n); -void ushort2int_sat_many( void *out, void *in, size_t n); -void short2int_many( void *out, void *in, size_t n); -void short2int_sat_many( void *out, void *in, size_t n); -void uint2int_many( void *out, void *in, size_t n); -void uint2int_sat_many( void *out, void *in, size_t n); -void int2int_many( void *out, void *in, size_t n); -void int2int_sat_many( void *out, void *in, size_t n); -void float2int_many( void *out, void *in, size_t n); -void float2int_sat_many( void *out, void *in, size_t n); -void double2int_many( void *out, void *in, size_t n); -void double2int_sat_many( void *out, void *in, size_t n); -void ulong2int_many( void *out, void *in, size_t n); -void ulong2int_sat_many( void *out, void *in, size_t n); -void long2int_many( void *out, void *in, size_t n); -void long2int_sat_many( void *out, void *in, size_t n); -void uchar2float_many( void *out, void *in, size_t n); -void uchar2float_sat_many( void *out, void *in, size_t n); -void char2float_many( void *out, void *in, size_t n); -void char2float_sat_many( void *out, void *in, size_t n); -void ushort2float_many( void *out, void *in, size_t n); -void ushort2float_sat_many( void *out, void *in, size_t n); -void short2float_many( void *out, void *in, size_t n); -void short2float_sat_many( void *out, void *in, size_t n); -void uint2float_many( void *out, void *in, size_t n); -void uint2float_sat_many( void *out, void *in, size_t n); -void int2float_many( void *out, void *in, size_t n); -void int2float_sat_many( void *out, void *in, size_t n); -void float2float_many( void *out, void *in, size_t n); -void float2float_sat_many( void *out, void *in, size_t n); -void double2float_many( void *out, void *in, size_t n); -void double2float_sat_many( void *out, void *in, size_t n); -void ulong2float_many( void *out, void *in, size_t n); -void ulong2float_sat_many( void *out, void *in, size_t n); -void long2float_many( void *out, void *in, size_t n); -void long2float_sat_many( void *out, void *in, size_t n); -void uchar2double_many( void *out, void *in, size_t n); -void uchar2double_sat_many( void *out, void *in, size_t n); -void char2double_many( void *out, void *in, size_t n); -void char2double_sat_many( void *out, void *in, size_t n); -void ushort2double_many( void *out, void *in, size_t n); -void ushort2double_sat_many( void *out, void *in, size_t n); -void short2double_many( void *out, void *in, size_t n); -void short2double_sat_many( void *out, void *in, size_t n); -void uint2double_many( void *out, void *in, size_t n); -void uint2double_sat_many( void *out, void *in, size_t n); -void int2double_many( void *out, void *in, size_t n); -void int2double_sat_many( void *out, void *in, size_t n); -void float2double_many( void *out, void *in, size_t n); -void float2double_sat_many( void *out, void *in, size_t n); -void double2double_many( void *out, void *in, size_t n); -void double2double_sat_many( void *out, void *in, size_t n); -void ulong2double_many( void *out, void *in, size_t n); -void ulong2double_sat_many( void *out, void *in, size_t n); -void long2double_many( void *out, void *in, size_t n); -void long2double_sat_many( void *out, void *in, size_t n); -void uchar2ulong_many( void *out, void *in, size_t n); -void uchar2ulong_sat_many( void *out, void *in, size_t n); -void char2ulong_many( void *out, void *in, size_t n); -void char2ulong_sat_many( void *out, void *in, size_t n); -void ushort2ulong_many( void *out, void *in, size_t n); -void ushort2ulong_sat_many( void *out, void *in, size_t n); -void short2ulong_many( void *out, void *in, size_t n); -void short2ulong_sat_many( void *out, void *in, size_t n); -void uint2ulong_many( void *out, void *in, size_t n); -void uint2ulong_sat_many( void *out, void *in, size_t n); -void int2ulong_many( void *out, void *in, size_t n); -void int2ulong_sat_many( void *out, void *in, size_t n); -void float2ulong_many( void *out, void *in, size_t n); -void float2ulong_sat_many( void *out, void *in, size_t n); -void double2ulong_many( void *out, void *in, size_t n); -void double2ulong_sat_many( void *out, void *in, size_t n); -void ulong2ulong_many( void *out, void *in, size_t n); -void ulong2ulong_sat_many( void *out, void *in, size_t n); -void long2ulong_many( void *out, void *in, size_t n); -void long2ulong_sat_many( void *out, void *in, size_t n); -void uchar2long_many( void *out, void *in, size_t n); -void uchar2long_sat_many( void *out, void *in, size_t n); -void char2long_many( void *out, void *in, size_t n); -void char2long_sat_many( void *out, void *in, size_t n); -void ushort2long_many( void *out, void *in, size_t n); -void ushort2long_sat_many( void *out, void *in, size_t n); -void short2long_many( void *out, void *in, size_t n); -void short2long_sat_many( void *out, void *in, size_t n); -void uint2long_many( void *out, void *in, size_t n); -void uint2long_sat_many( void *out, void *in, size_t n); -void int2long_many( void *out, void *in, size_t n); -void int2long_sat_many( void *out, void *in, size_t n); -void float2long_many( void *out, void *in, size_t n); -void float2long_sat_many( void *out, void *in, size_t n); -void double2long_many( void *out, void *in, size_t n); -void double2long_sat_many( void *out, void *in, size_t n); -void ulong2long_many( void *out, void *in, size_t n); -void ulong2long_sat_many( void *out, void *in, size_t n); -void long2long_many( void *out, void *in, size_t n); -void long2long_sat_many( void *out, void *in, size_t n); - -void uchar2uchar_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); } -void uchar2uchar_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); } -void char2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }} -void char2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }} -void ushort2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }} -void short2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }} -void short2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }} -void uint2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }} -void uint2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }} -void int2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }} -void int2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }} -void float2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }} -void float2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }} -void double2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }} -void double2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }} -void ulong2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }} -void long2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }} -void long2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }} -void uchar2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }} -void char2char_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); } -void char2char_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); } -void ushort2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }} -void short2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }} -void short2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }} -void uint2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }} -void uint2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }} -void int2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }} -void int2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }} -void float2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }} -void float2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }} -void double2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }} -void double2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }} -void ulong2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }} -void long2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }} -void long2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }} -void uchar2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }} -void char2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }} -void char2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }} -void ushort2ushort_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); } -void ushort2ushort_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); } -void short2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }} -void short2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }} -void uint2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }} -void uint2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }} -void int2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }} -void int2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }} -void float2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }} -void float2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }} -void double2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }} -void double2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }} -void ulong2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }} -void long2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }} -void long2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }} -void uchar2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }} -void char2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }} -void char2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }} -void ushort2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }} -void short2short_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); } -void short2short_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); } -void uint2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }} -void uint2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }} -void int2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }} -void int2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }} -void float2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }} -void float2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }} -void double2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }} -void double2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }} -void ulong2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }} -void long2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }} -void long2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }} -void uchar2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }} -void char2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }} -void char2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }} -void ushort2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }} -void short2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }} -void short2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }} -void uint2uint_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); } -void uint2uint_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); } -void int2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }} -void int2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }} -void float2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }} -void float2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }} -void double2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }} -void double2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }} -void ulong2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }} -void long2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }} -void long2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }} -void uchar2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }} -void char2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }} -void char2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }} -void ushort2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }} -void short2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }} -void short2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }} -void uint2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }} -void uint2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }} -void int2int_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); } -void int2int_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); } -void float2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }} -void float2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }} -void double2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }} -void double2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }} -void ulong2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }} -void long2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }} -void long2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }} -void uchar2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }} -void char2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }} -void char2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }} -void ushort2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }} -void short2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }} -void short2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }} -void uint2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }} -void uint2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }} -void int2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }} -void int2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }} -void float2float_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); } -void float2float_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); } -void double2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }} -void double2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }} -void ulong2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }} -void long2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }} -void long2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }} -void uchar2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }} -void char2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }} -void char2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }} -void ushort2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }} -void short2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }} -void short2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }} -void uint2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }} -void uint2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }} -void int2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }} -void int2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }} -void float2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }} -void float2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }} -void double2double_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); } -void double2double_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); } -void ulong2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }} -void long2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }} -void long2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }} -void uchar2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }} -void char2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }} -void char2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }} -void ushort2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }} -void short2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }} -void short2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }} -void uint2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }} -void uint2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }} -void int2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }} -void int2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }} -void float2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }} -void float2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }} -void double2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }} -void double2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }} -void ulong2ulong_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); } -void ulong2ulong_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); } -void long2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }} -void long2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }} -void uchar2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }} -void uchar2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }} -void char2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }} -void char2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }} -void ushort2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }} -void ushort2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }} -void short2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }} -void short2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }} -void uint2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }} -void uint2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }} -void int2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }} -void int2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }} -void float2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }} -void float2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }} -void double2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }} -void double2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }} -void ulong2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }} -void ulong2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }} -void long2long_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); } -void long2long_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); } - -Convert gSaturatedConversions[kTypeCount][kTypeCount] = { - { uchar2uchar_sat_many, char2uchar_sat_many, ushort2uchar_sat_many, short2uchar_sat_many, uint2uchar_sat_many, int2uchar_sat_many, float2uchar_sat_many, double2uchar_sat_many, ulong2uchar_sat_many, long2uchar_sat_many, }, - { uchar2char_sat_many, char2char_sat_many, ushort2char_sat_many, short2char_sat_many, uint2char_sat_many, int2char_sat_many, float2char_sat_many, double2char_sat_many, ulong2char_sat_many, long2char_sat_many, }, - { uchar2ushort_sat_many, char2ushort_sat_many, ushort2ushort_sat_many, short2ushort_sat_many, uint2ushort_sat_many, int2ushort_sat_many, float2ushort_sat_many, double2ushort_sat_many, ulong2ushort_sat_many, long2ushort_sat_many, }, - { uchar2short_sat_many, char2short_sat_many, ushort2short_sat_many, short2short_sat_many, uint2short_sat_many, int2short_sat_many, float2short_sat_many, double2short_sat_many, ulong2short_sat_many, long2short_sat_many, }, - { uchar2uint_sat_many, char2uint_sat_many, ushort2uint_sat_many, short2uint_sat_many, uint2uint_sat_many, int2uint_sat_many, float2uint_sat_many, double2uint_sat_many, ulong2uint_sat_many, long2uint_sat_many, }, - { uchar2int_sat_many, char2int_sat_many, ushort2int_sat_many, short2int_sat_many, uint2int_sat_many, int2int_sat_many, float2int_sat_many, double2int_sat_many, ulong2int_sat_many,long2int_sat_many, }, - { uchar2float_sat_many, char2float_sat_many, ushort2float_sat_many, short2float_sat_many, uint2float_sat_many, int2float_sat_many, float2float_sat_many, double2float_sat_many, ulong2float_sat_many, long2float_sat_many, }, - { uchar2double_sat_many, char2double_sat_many, ushort2double_sat_many, short2double_sat_many, uint2double_sat_many, int2double_sat_many, float2double_sat_many, double2double_sat_many, ulong2double_sat_many, long2double_sat_many, }, - { uchar2ulong_sat_many, char2ulong_sat_many, ushort2ulong_sat_many, short2ulong_sat_many, uint2ulong_sat_many, int2ulong_sat_many, float2ulong_sat_many, double2ulong_sat_many, ulong2ulong_sat_many, long2ulong_sat_many, }, - { uchar2long_sat_many, char2long_sat_many, ushort2long_sat_many, short2long_sat_many, uint2long_sat_many, int2long_sat_many, float2long_sat_many, double2long_sat_many, ulong2long_sat_many, long2long_sat_many, }, -}; + if (*name != '\0') return -4; -Convert gConversions[kTypeCount][kTypeCount] = { - { uchar2uchar_many, char2uchar_many, ushort2uchar_many, short2uchar_many, uint2uchar_many, int2uchar_many, float2uchar_many, double2uchar_many, ulong2uchar_many, long2uchar_many, }, - { uchar2char_many, char2char_many, ushort2char_many, short2char_many, uint2char_many, int2char_many, float2char_many, double2char_many, ulong2char_many, long2char_many, }, - { uchar2ushort_many, char2ushort_many, ushort2ushort_many, short2ushort_many, uint2ushort_many, int2ushort_many, float2ushort_many, double2ushort_many, ulong2ushort_many, long2ushort_many, }, - { uchar2short_many, char2short_many, ushort2short_many, short2short_many, uint2short_many, int2short_many, float2short_many, double2short_many, ulong2short_many, long2short_many, }, - { uchar2uint_many, char2uint_many, ushort2uint_many, short2uint_many, uint2uint_many, int2uint_many, float2uint_many, double2uint_many, ulong2uint_many, long2uint_many, }, - { uchar2int_many, char2int_many, ushort2int_many, short2int_many, uint2int_many, int2int_many, float2int_many, double2int_many, ulong2int_many, long2int_many, }, - { uchar2float_many, char2float_many, ushort2float_many, short2float_many, uint2float_many, int2float_many, float2float_many, double2float_many, ulong2float_many, long2float_many, }, - { uchar2double_many, char2double_many, ushort2double_many, short2double_many, uint2double_many, int2double_many, float2double_many, double2double_many, ulong2double_many, long2double_many, }, - { uchar2ulong_many, char2ulong_many, ushort2ulong_many, short2ulong_many, uint2ulong_many, int2ulong_many, float2ulong_many, double2ulong_many, ulong2ulong_many, long2ulong_many, }, - { uchar2long_many, char2long_many, ushort2long_many, short2long_many, uint2long_many, int2long_many, float2long_many, double2long_many, ulong2long_many, long2long_many, }, -}; + return 0; +} + +} // namespace conv_test diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h index ab887afd..2314ee74 100644 --- a/test_conformance/conversions/basic_test_conversions.h +++ b/test_conformance/conversions/basic_test_conversions.h @@ -1,6 +1,6 @@ // -// Copyright (c) 2017 The Khronos Group Inc. -// +// Copyright (c) 2023 The Khronos Group Inc. +// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at @@ -16,8 +16,6 @@ #ifndef BASIC_TEST_CONVERSIONS_H #define BASIC_TEST_CONVERSIONS_H -#include "harness/compat.h" - #if !defined(_WIN32) #include <unistd.h> #endif @@ -33,22 +31,23 @@ #endif #include "harness/mt19937.h" +#include "harness/testHarness.h" +#include "harness/typeWrappers.h" -typedef void (*Convert)( void *dest, void *src, size_t ); +#include <memory> +#include <tuple> +#include <vector> -#define kVectorSizeCount 6 -#define kMaxVectorSize 16 +#include "conversions_data_info.h" -typedef enum -{ - kUnsaturated = 0, - kSaturated, +#define kVectorSizeCount 6 +#define kMaxVectorSize 16 +#define kPageSize 4096 - kSaturationModeCount -}SaturationMode; +#define BUFFER_SIZE (1024 * 1024) +#define EMBEDDED_REDUCTION_FACTOR 16 +#define PERF_LOOP_COUNT 100 -extern Convert gConversions[kTypeCount][kTypeCount]; // [dest format][source format] -extern Convert gSaturatedConversions[kTypeCount][kTypeCount]; // [dest format][source format] extern const char *gTypeNames[ kTypeCount ]; extern const char *gRoundingModeNames[ kRoundingModeCount ]; // { "", "_rte", "_rtp", "_rtn", "_rtz" } extern const char *gSaturationNames[ kSaturationModeCount ]; // { "", "_sat" } @@ -68,5 +67,324 @@ extern InitDataFunc gInitFunctions[ kTypeCount ]; typedef int (*CheckResults)( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize ); extern CheckResults gCheckResults[ kTypeCount ]; +#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */) + +extern MTdata gMTdata; +extern cl_command_queue gQueue; +extern cl_context gContext; +extern cl_mem gInBuffer; +extern cl_mem gOutBuffers[]; +extern int gHasDouble; +extern int gTestDouble; +extern int gWimpyMode; +extern int gWimpyReductionFactor; +extern int gSkipTesting; +extern int gMinVectorSize; +extern int gMaxVectorSize; +extern int gForceFTZ; +extern int gTimeResults; +extern int gReportAverageTimes; +extern int gStartTestNumber; +extern int gEndTestNumber; +extern int gIsRTZ; +extern void *gIn; +extern void *gRef; +extern void *gAllowZ; +extern void *gOut[]; + +extern const char **argList; +extern int argCount; + +extern const char *sizeNames[]; +extern int vectorSizes[]; + +extern size_t gComputeDevices; +extern uint32_t gDeviceFrequency; + +namespace conv_test { + +cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, + RoundingMode round, int vectorSize, + cl_kernel *outKernel); + +int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount); + +int GetTestCase(const char *name, Type *outType, Type *inType, + SaturationMode *sat, RoundingMode *round); + +cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p); +cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); +uint64_t GetTime(void); + +void WriteInputBufferComplete(void *); +void *FlushToZero(void); +void UnFlushToZero(void *); +} + +struct CalcRefValsBase +{ + virtual ~CalcRefValsBase() = default; + virtual int check_result(void *, uint32_t, int) { return 0; } + + // pointer back to the parent WriteInputBufferInfo struct + struct WriteInputBufferInfo *parent; + clKernelWrapper kernel; // the kernel for this vector size + clProgramWrapper program; // the program for this vector size + cl_uint vectorSize; // the vector size for this callback chain + void *p; // the pointer to mapped result data for this vector size + cl_int result; +}; + +template <typename InType, typename OutType> +struct CalcRefValsPat : CalcRefValsBase +{ + int check_result(void *, uint32_t, int) override; +}; + +struct WriteInputBufferInfo +{ + WriteInputBufferInfo() + : calcReferenceValues(nullptr), doneBarrier(nullptr), count(0), + outType(kuchar), inType(kuchar), barrierCount(0) + {} + + volatile cl_event + calcReferenceValues; // user event which signals when main thread is + // done calculating reference values + volatile cl_event + doneBarrier; // user event which signals when worker threads are done + cl_uint count; // the number of elements in the array + Type outType; // the data type of the conversion result + Type inType; // the data type of the conversion input + volatile int barrierCount; + + std::vector<std::unique_ptr<CalcRefValsBase>> calcInfo; +}; + +// Must be aligned with Type enums! +using TypeIter = std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint, + cl_int, cl_float, cl_double, cl_ulong, cl_long>; + +// Helper test fixture for constructing OpenCL objects used in testing +// a variety of simple command-buffer enqueue scenarios. +struct ConversionsTest +{ + virtual ~ConversionsTest() = default; + + ConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue); + + cl_int SetUp(int elements); + + // Test body returning an OpenCL error code + cl_int Run(); + + template <typename InType, typename OutType> + int DoTest(Type outType, Type inType, SaturationMode sat, + RoundingMode round); + + template <typename InType, typename OutType> + void TestTypesConversion(const Type &inType, const Type &outType, int &tn, + const int smvs); + +protected: + cl_context context; + cl_device_id device; + cl_command_queue queue; + + size_t num_elements; + + TypeIter typeIterator; +}; + +struct CustomConversionsTest : ConversionsTest +{ + CustomConversionsTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : ConversionsTest(device, context, queue) + {} + + cl_int Run(); +}; + +template <class T> +int MakeAndRunTest(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + T test_fixture(device, context, queue); + + cl_int error = test_fixture.SetUp(num_elements); + test_error_ret(error, "Error in test initialization", TEST_FAIL); + + return test_fixture.Run(); +} + +struct TestType +{ + template <typename T> bool testType(Type in) + { + switch (in) + { + default: return false; + case kuchar: return std::is_same<cl_uchar, T>::value; + case kchar: return std::is_same<cl_char, T>::value; + case kushort: return std::is_same<cl_ushort, T>::value; + case kshort: return std::is_same<cl_short, T>::value; + case kuint: return std::is_same<cl_uint, T>::value; + case kint: return std::is_same<cl_int, T>::value; + case kfloat: return std::is_same<cl_float, T>::value; + case kdouble: return std::is_same<cl_double, T>::value; + case kulong: return std::is_same<cl_ulong, T>::value; + case klong: return std::is_same<cl_long, T>::value; + } + } +}; + +// Helper structures to iterate over all tuple attributes of different types +struct IterOverTypes : public TestType +{ + IterOverTypes(const TypeIter &typeIter, ConversionsTest &test) + : inType((Type)0), outType((Type)0), typeIter(typeIter), test(test), + testNumber(-1), startMinVectorSize(gMinVectorSize) + {} + + void Run() { for_each_out_elem(typeIter); } + +protected: + template <std::size_t Out = 0, typename OutType> + void iterate_out_type(const OutType &t) + { + for_each_in_elem<0, Out, OutType>(typeIter); + outType = (Type)(outType + 1); + inType = (Type)0; + } + + template <std::size_t In, std::size_t Out, typename OutType, + typename InType> + void iterate_in_type(const InType &t) + { + if (!testType<InType>(inType)) vlog_error("Unexpected data type!\n"); + + if (!testType<OutType>(outType)) vlog_error("Unexpected data type!\n"); + + // run the conversions + test.TestTypesConversion<InType, OutType>(inType, outType, testNumber, + startMinVectorSize); + inType = (Type)(inType + 1); + } + + template <std::size_t Out = 0, typename... Tp> + inline typename std::enable_if<Out == sizeof...(Tp), void>::type + for_each_out_elem( + const std::tuple<Tp...> &) // Unused arguments are given no names. + {} + + template <std::size_t Out = 0, typename... Tp> + inline typename std::enable_if < Out<sizeof...(Tp), void>::type + for_each_out_elem(const std::tuple<Tp...> &t) + { + iterate_out_type<Out>(std::get<Out>(t)); + for_each_out_elem<Out + 1, Tp...>(t); + } + + template <std::size_t In = 0, std::size_t Out, typename OutType, + typename... Tp> + inline typename std::enable_if<In == sizeof...(Tp), void>::type + for_each_in_elem( + const std::tuple<Tp...> &) // Unused arguments are given no names. + {} + + template <std::size_t In = 0, std::size_t Out, typename OutType, + typename... Tp> + inline typename std::enable_if < In<sizeof...(Tp), void>::type + for_each_in_elem(const std::tuple<Tp...> &t) + { + iterate_in_type<In, Out, OutType>(std::get<In>(t)); + for_each_in_elem<In + 1, Out, OutType, Tp...>(t); + } + +protected: + Type inType; + Type outType; + const TypeIter &typeIter; + ConversionsTest &test; + int testNumber; + int startMinVectorSize; +}; + + +// Helper structures to select type 2 type conversion test case +struct IterOverSelectedTypes : public TestType +{ + IterOverSelectedTypes(const TypeIter &typeIter, ConversionsTest &test, + const Type in, const Type out, + const RoundingMode round, const SaturationMode sat) + : inType(in), outType(out), rounding(round), saturation(sat), + typeIter(typeIter), test(test), testNumber(-1), + startMinVectorSize(gMinVectorSize) + {} + + void Run() { for_each_out_elem(typeIter); } + +protected: + template <std::size_t Out = 0, typename OutType> + void iterate_out_type(const OutType &t) + { + for_each_in_elem<0, Out, OutType>(typeIter); + } + + template <std::size_t In, std::size_t Out, typename OutType, + typename InType> + void iterate_in_type(const InType &t) + { + if (testType<InType>(inType) && testType<OutType>(outType)) + { + // run selected conversion + // testing of the result will happen afterwards + test.DoTest<InType, OutType>(outType, inType, saturation, rounding); + } + } + + template <std::size_t Out = 0, typename... Tp> + inline typename std::enable_if<Out == sizeof...(Tp), void>::type + for_each_out_elem(const std::tuple<Tp...> &) + {} + + template <std::size_t Out = 0, typename... Tp> + inline typename std::enable_if < Out<sizeof...(Tp), void>::type + for_each_out_elem(const std::tuple<Tp...> &t) + { + iterate_out_type<Out>(std::get<Out>(t)); + for_each_out_elem<Out + 1, Tp...>(t); + } + + template <std::size_t In = 0, std::size_t Out, typename OutType, + typename... Tp> + inline typename std::enable_if<In == sizeof...(Tp), void>::type + for_each_in_elem(const std::tuple<Tp...> &) + {} + + template <std::size_t In = 0, std::size_t Out, typename OutType, + typename... Tp> + inline typename std::enable_if < In<sizeof...(Tp), void>::type + for_each_in_elem(const std::tuple<Tp...> &t) + { + iterate_in_type<In, Out, OutType>(std::get<In>(t)); + for_each_in_elem<In + 1, Out, OutType, Tp...>(t); + } + +protected: + Type inType; + Type outType; + RoundingMode rounding; + SaturationMode saturation; + + const TypeIter &typeIter; + ConversionsTest &test; + int testNumber; + int startMinVectorSize; +}; + + #endif /* BASIC_TEST_CONVERSIONS_H */ diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h new file mode 100644 index 00000000..4f46a24e --- /dev/null +++ b/test_conformance/conversions/conversions_data_info.h @@ -0,0 +1,792 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef CONVERSIONS_DATA_INFO_H +#define CONVERSIONS_DATA_INFO_H + +#if defined(__APPLE__) +#include <OpenCL/opencl.h> +#else +#include <CL/opencl.h> +#endif + +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +#include "fplib.h" +extern bool qcom_sat; +extern roundingMode qcom_rm; +#endif + +#include "harness/mt19937.h" +#include "harness/rounding_mode.h" + +#include <vector> + +#if defined(__linux__) +#include <sys/param.h> +#include <libgen.h> +#endif + +extern size_t gTypeSizes[kTypeCount]; +extern void *gIn; + + +typedef enum +{ + kUnsaturated = 0, + kSaturated, + + kSaturationModeCount +} SaturationMode; + +struct DataInitInfo +{ + cl_ulong start; + cl_uint size; + Type outType; + Type inType; + SaturationMode sat; + RoundingMode round; + cl_uint threads; + + static std::vector<uint32_t> specialValuesUInt; + static std::vector<float> specialValuesFloat; + static std::vector<double> specialValuesDouble; +}; + +struct DataInitBase : public DataInitInfo +{ + virtual ~DataInitBase() = default; + + explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {} + virtual void conv_array(void *out, void *in, size_t n) {} + virtual void conv_array_sat(void *out, void *in, size_t n) {} + virtual void init(const cl_uint &, const cl_uint &) {} +}; + +template <typename InType, typename OutType> +struct DataInfoSpec : public DataInitBase +{ + explicit DataInfoSpec(const DataInitInfo &agg); + + // helpers + float round_to_int(float f); + long long round_to_int_and_clamp(double d); + + OutType absolute(const OutType &x); + + // actual conversion of reference values + void conv(OutType *out, InType *in); + void conv_sat(OutType *out, InType *in); + + // min/max ranges for output type of data + std::pair<OutType, OutType> ranges; + + // matrix of clamping ranges for each rounding type + std::vector<std::pair<InType, InType>> clamp_ranges; + + std::vector<MTdataHolder> mdv; + + void conv_array(void *out, void *in, size_t n) override + { + for (size_t i = 0; i < n; i++) + conv(&((OutType *)out)[i], &((InType *)in)[i]); + } + + void conv_array_sat(void *out, void *in, size_t n) override + { + for (size_t i = 0; i < n; i++) + conv_sat(&((OutType *)out)[i], &((InType *)in)[i]); + } + + void init(const cl_uint &, const cl_uint &) override; + InType clamp(const InType &); + inline float fclamp(float lo, float v, float hi) + { + v = v < lo ? lo : v; + return v < hi ? v : hi; + } + + inline double dclamp(double lo, double v, double hi) + { + v = v < lo ? lo : v; + return v < hi ? v : hi; + } +}; + +template <typename InType, typename OutType> +DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg) + : DataInitBase(agg), mdv(0) +{ + if (std::is_same<cl_float, OutType>::value) + ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX); + else if (std::is_same<cl_double, OutType>::value) + ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX); + else if (std::is_same<cl_uchar, OutType>::value) + ranges = std::make_pair(0, CL_UCHAR_MAX); + else if (std::is_same<cl_char, OutType>::value) + ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX); + else if (std::is_same<cl_ushort, OutType>::value) + ranges = std::make_pair(0, CL_USHRT_MAX); + else if (std::is_same<cl_short, OutType>::value) + ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX); + else if (std::is_same<cl_uint, OutType>::value) + ranges = std::make_pair(0, CL_UINT_MAX); + else if (std::is_same<cl_int, OutType>::value) + ranges = std::make_pair(CL_INT_MIN, CL_INT_MAX); + else if (std::is_same<cl_ulong, OutType>::value) + ranges = std::make_pair(0, CL_ULONG_MAX); + else if (std::is_same<cl_long, OutType>::value) + ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX); + + // clang-format off + // for readability sake keep this section unformatted + if (std::is_floating_point<InType>::value) + { // from float/double + InType outMin = static_cast<InType>(ranges.first); + InType outMax = static_cast<InType>(ranges.second); + + InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON; + if (std::is_integral<OutType>::value) + { // to char/uchar/short/ushort/int/uint/long/ulong + if (sizeof(OutType)<=sizeof(cl_short)) + { // to char/uchar/short/ushort + clamp_ranges= + {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, + {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps}, + {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f}, + {outMin-0.0f, outMax - outMax * 0.5f * eps }, + {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax - outMax * 0.5f * eps}}; + } + else if (std::is_same<InType, cl_float>::value) + { // from float + if (std::is_same<OutType, cl_uint>::value) + { // to uint + clamp_ranges= + { {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}, + {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) }, + {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}}; + } + else if (std::is_same<OutType, cl_int>::value) + { // to int + clamp_ranges= + { {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) }, + {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}}; + } + else if (std::is_same<OutType, cl_ulong>::value) + { // to ulong + clamp_ranges= + {{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}, + {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) }, + {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}}; + } + else if (std::is_same<OutType, cl_long>::value) + { // to long + clamp_ranges= + { {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}, + {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}}; + } + } + else + { // from double + if (std::is_same<OutType, cl_uint>::value) + { // to uint + clamp_ranges= + { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps}, + {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps}, + {outMin-1.0f+0.5f*eps, outMax}, + {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) }, + {outMin-1.0f+0.5f*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)}}; + } + else if (std::is_same<OutType, cl_int>::value) + { // to int + clamp_ranges= + { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}, + {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}, + {outMin-1.0f+outMax*eps, outMax}, + {outMin-0.0f, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps }, + {outMin-1.0f+outMax*eps, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}}; + } + else if (std::is_same<OutType, cl_ulong>::value) + { // to ulong + clamp_ranges= + {{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}, + {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) }, + {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}}; + } + else if (std::is_same<OutType, cl_long>::value) + { // to long + clamp_ranges= + { {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}, + {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}}; + } + } + } + } + // clang-format on +} + +template <typename InType, typename OutType> +float DataInfoSpec<InType, OutType>::round_to_int(float f) +{ + static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23), + -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) }; + + // Round fractional values to integer in round towards nearest mode + if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23)) + { + volatile float x = f; + float magicVal = magic[f < 0]; + +#if defined(__SSE__) + // Defeat x87 based arithmetic, which cant do FTZ, and will round this + // incorrectly + __m128 v = _mm_set_ss(x); + __m128 m = _mm_set_ss(magicVal); + v = _mm_add_ss(v, m); + v = _mm_sub_ss(v, m); + _mm_store_ss((float *)&x, v); +#else + x += magicVal; + x -= magicVal; +#endif + f = x; + } + return f; +} + +template <typename InType, typename OutType> +long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f) +{ + static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), + MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) }; + + if (f >= -(double)LLONG_MIN) return LLONG_MAX; + + if (f <= (double)LLONG_MIN) return LLONG_MIN; + + // Round fractional values to integer in round towards nearest mode + if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52)) + { + volatile double x = f; + double magicVal = magic[f < 0]; +#if defined(__SSE2__) || defined(_MSC_VER) + // Defeat x87 based arithmetic, which cant do FTZ, and will round this + // incorrectly + __m128d v = _mm_set_sd(x); + __m128d m = _mm_set_sd(magicVal); + v = _mm_add_sd(v, m); + v = _mm_sub_sd(v, m); + _mm_store_sd((double *)&x, v); +#else + x += magicVal; + x -= magicVal; +#endif + f = x; + } + return (long long)f; +} + +template <typename InType, typename OutType> +OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x) +{ + union { + cl_uint u; + OutType f; + } u; + u.f = x; + if (std::is_same<OutType, float>::value) + u.u &= 0x7fffffff; + else if (std::is_same<OutType, double>::value) + u.u &= 0x7fffffffffffffffULL; + else + log_error("Unexpected argument type of DataInfoSpec::absolute"); + + return u.f; +} + +template <typename InType, typename OutType> +void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in) +{ + if (std::is_same<cl_float, InType>::value) + { + cl_float inVal = *in; + + if (std::is_floating_point<OutType>::value) + { + *out = (OutType)inVal; + } + else if (std::is_same<cl_ulong, OutType>::value) + { +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + // VS2005 (at least) on x86 uses fistp to store the float as a + // 64-bit int. However, fistp stores it as a signed int, and some of + // the test values won't fit into a signed int. (These test values + // are >= 2^63.) The result on VS2005 is that these end up silently + // (at least by default settings) clamped to the max lowest ulong. + cl_float x = round_to_int(inVal); + if (x >= 9223372036854775808.0f) + { + x -= 9223372036854775808.0f; + ((cl_ulong *)out)[0] = x; + ((cl_ulong *)out)[0] += 9223372036854775808ULL; + } + else + { + ((cl_ulong *)out)[0] = x; + } +#else + *out = round_to_int(inVal); +#endif + } + else if (std::is_same<cl_long, OutType>::value) + { + *out = round_to_int_and_clamp(inVal); + } + else + *out = round_to_int(inVal); + } + else if (std::is_same<cl_double, InType>::value) + { + if (std::is_same<cl_float, OutType>::value) + *out = (OutType)*in; + else + *out = rint(*in); + } + else if (std::is_same<cl_ulong, InType>::value + || std::is_same<cl_long, InType>::value) + { + if (std::is_same<cl_double, OutType>::value) + { +#if defined(_MSC_VER) + cl_ulong l = ((cl_ulong *)in)[0]; + double result; + + if (std::is_same<cl_ulong, InType>::value) + { + cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) + : (cl_long)l; +#if defined(_M_X64) + _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl)); +#else + result = sl; +#endif + ((double *)out)[0] = + (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result)); + } + else + { + _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l)); + ((double *)out)[0] = + (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 + } +#else + // Use volatile to prevent optimization by Clang compiler + volatile InType vi = *in; + *out = (vi == 0 ? 0.0 : static_cast<OutType>(vi)); +#endif + } + else if (std::is_same<cl_float, OutType>::value) + { + cl_float outVal = 0.f; + +#if defined(_MSC_VER) && defined(_M_X64) + cl_ulong l = ((cl_ulong *)in)[0]; + float result; + if (std::is_same<cl_ulong, InType>::value) + { + cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) + : (cl_long)l; + _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl)); + outVal = (l == 0 ? 0.0f + : (((cl_long)l < 0) ? result * 2.0f : result)); + } + else + { + _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l)); + outVal = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, + // 0's always convert to +0.0 + } +#else + InType l = ((InType *)in)[0]; +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) + /* ARM VFP doesn't have hardware instruction for converting from + * 64-bit integer to float types, hence GCC ARM uses the + * floating-point emulation code despite which -mfloat-abi setting + * it is. But the emulation code in libgcc.a has only one rounding + * mode (round to nearest even in this case) and ignores the user + * rounding mode setting in hardware. As a result setting rounding + * modes in hardware won't give correct rounding results for type + * covert from 64-bit integer to float using GCC for ARM compiler so + * for testing different rounding modes, we need to use alternative + * reference function. ARM64 does have an instruction, however we + * cannot guarantee the compiler will use it. On all ARM + * architechures use emulation to calculate reference.*/ + if (std::is_same<cl_ulong, InType>::value) + outVal = qcom_u64_2_f32(l, qcom_sat, qcom_rm); + else + outVal = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm)); +#else + outVal = (l == 0 ? 0.0f : (float)l); // Per IEEE-754-2008 5.4.1, 0's + // always convert to +0.0 +#endif +#endif + + *out = outVal; + } + else + { + *out = (OutType)*in; + } + } + else + { + if (std::is_same<cl_float, OutType>::value) + { + // Use volatile to prevent optimization by Clang compiler + volatile InType vi = *in; + // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0 + *out = (vi == 0 ? 0.0f : vi); + } + else if (std::is_same<cl_double, OutType>::value) + { + // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0 + *out = (*in == 0 ? 0.0 : *in); + } + else + { + *out = (OutType)*in; + } + } +} + +#define CLAMP(_lo, _x, _hi) \ + ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x))) + +template <typename InType, typename OutType> +void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in) +{ + if (std::is_floating_point<InType>::value) + { + if (std::is_floating_point<OutType>::value) + { // in float/double, out float/double + *out = (OutType)(*in); + } + else if ((std::is_same<InType, cl_float>::value) + && std::is_same<cl_ulong, OutType>::value) + { + cl_float x = round_to_int(*in); + +#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + // VS2005 (at least) on x86 uses fistp to store the float as a + // 64-bit int. However, fistp stores it as a signed int, and some of + // the test values won't fit into a signed int. (These test values + // are >= 2^63.) The result on VS2005 is that these end up silently + // (at least by default settings) clamped to the max lowest ulong. + if (x >= 18446744073709551616.0f) + { // 2^64 + *out = 0xFFFFFFFFFFFFFFFFULL; + } + else if (x < 0) + { + *out = 0; + } + else if (x >= 9223372036854775808.0f) + { // 2^63 + x -= 9223372036854775808.0f; + *out = x; + *out += 9223372036854775808ULL; + } + else + { + *out = x; + } +#else + *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) + ? 0xFFFFFFFFFFFFFFFFULL + : x < 0 ? 0 : (OutType)x; +#endif + } + else if ((std::is_same<InType, cl_float>::value) + && std::is_same<cl_long, OutType>::value) + { + cl_float f = round_to_int(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) + ? 0x7FFFFFFFFFFFFFFFULL + : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) + ? 0x8000000000000000LL + : (OutType)f; + } + else if (std::is_same<InType, cl_double>::value + && std::is_same<cl_ulong, OutType>::value) + { + InType f = rint(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) + ? 0xFFFFFFFFFFFFFFFFULL + : f < 0 ? 0 : (OutType)f; + } + else if (std::is_same<InType, cl_double>::value + && std::is_same<cl_long, OutType>::value) + { + InType f = rint(*in); + *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) + ? 0x7FFFFFFFFFFFFFFFULL + : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) + ? 0x8000000000000000LL + : (OutType)f; + } + else + { // in float/double, out char/uchar/short/ushort/int/uint + *out = + CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second); + } + } + else if (std::is_integral<InType>::value + && std::is_integral<OutType>::value) + { + { + if ((std::is_signed<InType>::value + && std::is_signed<OutType>::value) + || (!std::is_signed<InType>::value + && !std::is_signed<OutType>::value)) + { + if (sizeof(InType) <= sizeof(OutType)) + { + *out = (OutType)*in; + } + else + { + *out = CLAMP(ranges.first, *in, ranges.second); + } + } + else + { // mixed signed/unsigned types + if (sizeof(InType) < sizeof(OutType)) + { + *out = (!std::is_signed<InType>::value) + ? (OutType)*in + : CLAMP(0, *in, ranges.second); // *in < 0 ? 0 : *in + } + else + { // bigger/equal mixed signed/unsigned types - always clamp + *out = CLAMP(0, *in, ranges.second); + } + } + } + } + else + { // InType integral, OutType floating + *out = std::is_signed<InType>::value ? (OutType)*in + : absolute((OutType)*in); + } +} + +template <typename InType, typename OutType> +void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id, + const cl_uint &thread_id) +{ + uint64_t ulStart = start; + void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType]; + + if (std::is_integral<InType>::value) + { + InType *o = (InType *)pIn; + if (sizeof(InType) <= sizeof(cl_short)) + { // char/uchar/ushort/short + for (int i = 0; i < size; i++) o[i] = ulStart++; + } + else if (sizeof(InType) <= sizeof(cl_int)) + { // int/uint + int i = 0; + if (gIsEmbedded) + for (i = 0; i < size; i++) + o[i] = (InType)genrand_int32(mdv[thread_id]); + else + for (i = 0; i < size; i++) o[i] = (InType)i + ulStart; + + if (0 == ulStart) + { + size_t tableSize = specialValuesUInt.size() + * sizeof(decltype(specialValuesUInt)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesUInt.front(), + tableSize); + } + } + else + { // long/ulong + cl_ulong *o = (cl_ulong *)pIn; + cl_ulong i, j, k; + + i = 0; + if (ulStart == 0) + { + // Try various powers of two + for (j = 0; j < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[j] = (cl_ulong)1 << j; + i = j; + + // try the complement of those + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[i++] = ~((cl_ulong)1 << j); + + // Try various negative powers of two + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + o[i++] = (cl_ulong)0xFFFFFFFFFFFFFFFEULL << j; + + // try various powers of two plus 1, shifted by various amounts + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + for (k = 0; + i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j; + k++) + o[i++] = (((cl_ulong)1 << j) + 1) << k; + + // try various powers of two minus 1 + for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++) + for (k = 0; + i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j; + k++) + o[i++] = (((cl_ulong)1 << j) - 1) << k; + + // Other patterns + cl_ulong pattern[] = { + 0x3333333333333333ULL, 0x5555555555555555ULL, + 0x9999999999999999ULL, 0x6666666666666666ULL, + 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL + }; + cl_ulong mask[] = { 0xffffffffffffffffULL, + 0xff00ff00ff00ff00ULL, + 0xffff0000ffff0000ULL, + 0xffffffff00000000ULL }; + for (j = 0; i < (cl_ulong)size + && j < sizeof(pattern) / sizeof(pattern[0]); + j++) + for (k = 0; i + 2 <= (cl_ulong)size + && k < sizeof(mask) / sizeof(mask[0]); + k++) + { + o[i++] = pattern[j] & mask[k]; + o[i++] = pattern[j] & ~mask[k]; + } + } + + auto &md = mdv[thread_id]; + for (; i < (cl_ulong)size; i++) + o[i] = (cl_ulong)genrand_int32(md) + | ((cl_ulong)genrand_int32(md) << 32); + } + } // integrals + else if (std::is_same<InType, cl_float>::value) + { + cl_uint *o = (cl_uint *)pIn; + int i; + + if (gIsEmbedded) + for (i = 0; i < size; i++) + o[i] = (cl_uint)genrand_int32(mdv[thread_id]); + else + for (i = 0; i < size; i++) o[i] = (cl_uint)i + ulStart; + + if (0 == ulStart) + { + size_t tableSize = specialValuesFloat.size() + * sizeof(decltype(specialValuesFloat)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesFloat.front(), + tableSize); + } + + if (kUnsaturated == sat) + { + InType *f = (InType *)pIn; + for (i = 0; i < size; i++) f[i] = clamp(f[i]); + } + } + else if (std::is_same<InType, cl_double>::value) + { + InType *o = (InType *)pIn; + int i = 0; + + union { + uint64_t u; + InType d; + } u; + + for (i = 0; i < size; i++) + { + uint64_t z = i + ulStart; + + uint32_t bits = ((uint32_t)z ^ (uint32_t)(z >> 32)); + // split 0x89abcdef to 0x89abc00000000def + u.u = bits & 0xfffU; + u.u |= (uint64_t)(bits & ~0xfffU) << 32; + // sign extend the leading bit of def segment as sign bit so that + // the middle region consists of either all 1s or 0s + u.u -= (bits & 0x800U) << 1; + o[i] = u.d; + } + + if (0 == ulStart) + { + size_t tableSize = specialValuesDouble.size() + * sizeof(decltype(specialValuesDouble)::value_type); + if (sizeof(InType) * size < tableSize) + tableSize = sizeof(InType) * size; + memcpy((char *)(o + i) - tableSize, &specialValuesDouble.front(), + tableSize); + } + + if (0 == sat) + for (i = 0; i < size; i++) o[i] = clamp(o[i]); + } +} + +template <typename InType, typename OutType> +InType DataInfoSpec<InType, OutType>::clamp(const InType &in) +{ + if (std::is_integral<OutType>::value) + { + if (std::is_same<InType, cl_float>::value) + { + return fclamp(clamp_ranges[round].first, in, + clamp_ranges[round].second); + } + else if (std::is_same<InType, cl_double>::value) + { + return dclamp(clamp_ranges[round].first, in, + clamp_ranges[round].second); + } + } + return in; +} + +#endif /* CONVERSIONS_DATA_INFO_H */ diff --git a/test_conformance/conversions/fplib.h b/test_conformance/conversions/fplib.h index 534550a3..c69b1e89 100644 --- a/test_conformance/conversions/fplib.h +++ b/test_conformance/conversions/fplib.h @@ -13,6 +13,9 @@ // See the License for the specific language governing permissions and // limitations under the License. // +#ifndef CONVERSIONS_FPLIB_H +#define CONVERSIONS_FPLIB_H + #include <stdbool.h> #include <stdint.h> @@ -28,3 +31,5 @@ typedef enum float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd); float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd); + +#endif diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp index 2ee05463..dab61dc5 100644 --- a/test_conformance/conversions/test_conversions.cpp +++ b/test_conformance/conversions/test_conversions.cpp @@ -13,12 +13,11 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" -#include "harness/rounding_mode.h" #include "harness/ThreadPool.h" #include "harness/testHarness.h" -#include "harness/kernelHelpers.h" #include "harness/parseParameters.h" +#include "harness/mt19937.h" + #if defined(__APPLE__) #include <sys/sysctl.h> #endif @@ -33,7 +32,6 @@ #include <libgen.h> #endif -#include "mingw_compat.h" #if defined(__MINGW32__) #include <sys/param.h> #endif @@ -49,283 +47,77 @@ #include <time.h> #include <algorithm> +#include <type_traits> +#include <vector> #include "Sleep.h" -#include "basic_test_conversions.h" - -#if (defined(_WIN32) && defined(_MSC_VER)) -// need for _controlfp_s and rouinding modes in RoundingMode -#include "harness/testHarness.h" -#endif - -#pragma mark - -#pragma mark globals - -#define BUFFER_SIZE (1024 * 1024) -#define kPageSize 4096 -#define EMBEDDED_REDUCTION_FACTOR 16 -#define PERF_LOOP_COUNT 100 -#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */) +#include "basic_test_conversions.h" +#include <climits> +#include <cstring> #if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) #include "fplib.h" -extern bool qcom_sat; -extern roundingMode qcom_rm; #endif -const char **argList = NULL; -int argCount = 0; -cl_context gContext = NULL; -cl_command_queue gQueue = NULL; -char appName[64] = "ctest"; -int gStartTestNumber = -1; -int gEndTestNumber = 0; -#if defined(__APPLE__) -int gTimeResults = 1; -#else -int gTimeResults = 0; +#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) +/* Rounding modes and saturation for use with qcom 64 bit to float conversion + * library */ +bool qcom_sat; +roundingMode qcom_rm; #endif -int gReportAverageTimes = 0; -void *gIn = NULL; -void *gRef = NULL; -void *gAllowZ = NULL; -void *gOut[kCallStyleCount] = { NULL }; -cl_mem gInBuffer; -cl_mem gOutBuffers[kCallStyleCount]; -size_t gComputeDevices = 0; -uint32_t gDeviceFrequency = 0; -int gWimpyMode = 0; -int gWimpyReductionFactor = 128; -int gSkipTesting = 0; -int gForceFTZ = 0; -int gMultithread = 1; -int gIsRTZ = 0; -uint32_t gSimdSize = 1; -int gHasDouble = 0; -int gTestDouble = 1; -const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" }; -const int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 }; -int gMinVectorSize = 0; -int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]); -static MTdata gMTdata; - -#pragma mark - -#pragma mark Declarations + static int ParseArgs(int argc, const char **argv); static void PrintUsage(void); test_status InitCL(cl_device_id device); -static int GetTestCase(const char *name, Type *outType, Type *inType, - SaturationMode *sat, RoundingMode *round); -static int DoTest(cl_device_id device, Type outType, Type inType, - SaturationMode sat, RoundingMode round, MTdata d); -static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, - RoundingMode round, int vectorSize, - cl_kernel *outKernel); -static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, - size_t blockCount); - -void *FlushToZero(void); -void UnFlushToZero(void *); - -// Windows (since long double got deprecated) sets the x87 to 53-bit precision -// (that's x87 default state). This causes problems with the tests that -// convert long and ulong to float and double or otherwise deal with values -// that need more precision than 53-bit. So, set the x87 to 64-bit precision. -static inline void Force64BitFPUPrecision(void) -{ -#if __MINGW32__ - // The usual method is to use _controlfp as follows: - // #include <float.h> - // _controlfp(_PC_64, _MCW_PC); - // - // _controlfp is available on MinGW32 but not on MinGW64. Instead of having - // divergent code just use inline assembly which works for both. - unsigned short int orig_cw = 0; - unsigned short int new_cw = 0; - __asm__ __volatile__("fstcw %0" : "=m"(orig_cw)); - new_cw = orig_cw | 0x0300; // set precision to 64-bit - __asm__ __volatile__("fldcw %0" ::"m"(new_cw)); -#else - /* Implement for other platforms if needed */ -#endif -} -int test_conversions(cl_device_id device, cl_context context, - cl_command_queue queue, int num_elements) -{ - int error, i, testNumber = -1; - int startMinVectorSize = gMinVectorSize; - Type inType, outType; - RoundingMode round; - SaturationMode sat; - if (argCount) - { - for (i = 0; i < argCount; i++) - { - if (GetTestCase(argList[i], &outType, &inType, &sat, &round)) - { - vlog_error("\n\t\t**** ERROR: Unable to parse function name " - "%s. Skipping.... *****\n\n", - argList[i]); - continue; - } +const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short", + "uint", "int", "float", "double", + "ulong", "long" }; - // skip double if we don't have it - if (!gTestDouble && (inType == kdouble || outType == kdouble)) - { - if (gHasDouble) - { - vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", - gTypeNames[outType], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double testing turned " - "off.\n"); - } +const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp", + "_rtn", "_rtz" }; - continue; - } +const char *gSaturationNames[2] = { "", "_sat" }; - // skip longs on embedded - if (!gHasLong - && (inType == klong || outType == klong || inType == kulong - || outType == kulong)) - { - continue; - } +size_t gTypeSizes[kTypeCount] = { + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), sizeof(cl_double), + sizeof(cl_ulong), sizeof(cl_long), +}; - // Skip the implicit converts if the rounding mode is not default or - // test is saturated - if (0 == startMinVectorSize) - { - if (sat || round != kDefaultRoundingMode) - gMinVectorSize = 1; - else - gMinVectorSize = 0; - } +char appName[64] = "ctest"; +int gMultithread = 1; - if ((error = DoTest(device, outType, inType, sat, round, gMTdata))) - { - vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n", - gTypeNames[outType], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType]); - } - } + +int test_conversions(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + if (argCount) + { + return MakeAndRunTest<CustomConversionsTest>(device, context, queue, + num_elements); } else { - for (outType = (Type)0; outType < kTypeCount; - outType = (Type)(outType + 1)) - { - for (inType = (Type)0; inType < kTypeCount; - inType = (Type)(inType + 1)) - { - // skip longs on embedded - if (!gHasLong - && (inType == klong || outType == klong || inType == kulong - || outType == kulong)) - { - continue; - } - - for (sat = (SaturationMode)0; sat < kSaturationModeCount; - sat = (SaturationMode)(sat + 1)) - { - // skip illegal saturated conversions to float type - if (kSaturated == sat - && (outType == kfloat || outType == kdouble)) - { - continue; - } - - for (round = (RoundingMode)0; round < kRoundingModeCount; - round = (RoundingMode)(round + 1)) - { - if (++testNumber < gStartTestNumber) - { - // vlog( "%d) skipping convert_%sn%s%s( %sn - // )\n", testNumber, gTypeNames[ outType ], - // gSaturationNames[ sat ], - // gRoundingModeNames[round], gTypeNames[inType] - // ); - continue; - } - else - { - if (gEndTestNumber > 0 - && testNumber >= gEndTestNumber) - { - goto exit; - } - } - - vlog("%d) Testing convert_%sn%s%s( %sn ):\n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], gRoundingModeNames[round], - gTypeNames[inType]); - - // skip double if we don't have it - if (!gTestDouble - && (inType == kdouble || outType == kdouble)) - { - if (gHasDouble) - { - vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " - "FAILED ** \n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], - gRoundingModeNames[round], - gTypeNames[inType]); - vlog("\t\tcl_khr_fp64 enabled, but double " - "testing turned off.\n"); - } - continue; - } - - // Skip the implicit converts if the rounding mode is - // not default or test is saturated - if (0 == startMinVectorSize) - { - if (sat || round != kDefaultRoundingMode) - gMinVectorSize = 1; - else - gMinVectorSize = 0; - } - - if ((error = DoTest(device, outType, inType, sat, round, - gMTdata))) - { - vlog_error("\t *** %d) convert_%sn%s%s( %sn ) " - "FAILED ** \n", - testNumber, gTypeNames[outType], - gSaturationNames[sat], - gRoundingModeNames[round], - gTypeNames[inType]); - } - } - } - } - } + return MakeAndRunTest<ConversionsTest>(device, context, queue, + num_elements); } - -exit: - return gFailCount; } + test_definition test_list[] = { ADD_TEST(conversions), }; const int test_num = ARRAY_SIZE(test_list); -#pragma mark - int main(int argc, const char **argv) { int error; - cl_uint seed = (cl_uint)time(NULL); argc = parseCustomParam(argc, argv); if (argc == -1) @@ -352,8 +144,8 @@ int main(int argc, const char **argv) #endif vlog("===========================================================\n"); - vlog("Random seed: %u\n", seed); - gMTdata = init_genrand(seed); + vlog("Random seed: %u\n", gRandomSeed); + gMTdata = init_genrand(gRandomSeed); const char *arg[] = { argv[0] }; int ret = @@ -378,8 +170,6 @@ int main(int argc, const char **argv) return ret; } -#pragma mark - -#pragma mark setup static int ParseArgs(int argc, const char **argv) { @@ -509,7 +299,7 @@ static int ParseArgs(int argc, const char **argv) gWimpyMode = 1; } - vlog( "\n" ); + vlog("\n"); PrintArch(); @@ -526,6 +316,7 @@ static int ParseArgs(int argc, const char **argv) return 0; } + static void PrintUsage(void) { int i; @@ -564,63 +355,6 @@ static void PrintUsage(void) } -static int GetTestCase(const char *name, Type *outType, Type *inType, - SaturationMode *sat, RoundingMode *round) -{ - int i; - - // Find the return type - for (i = 0; i < kTypeCount; i++) - if (name == strstr(name, gTypeNames[i])) - { - *outType = (Type)i; - name += strlen(gTypeNames[i]); - - break; - } - - if (i == kTypeCount) return -1; - - // Check to see if _sat appears next - *sat = (SaturationMode)0; - for (i = 1; i < kSaturationModeCount; i++) - if (name == strstr(name, gSaturationNames[i])) - { - *sat = (SaturationMode)i; - name += strlen(gSaturationNames[i]); - break; - } - - *round = (RoundingMode)0; - for (i = 1; i < kRoundingModeCount; i++) - if (name == strstr(name, gRoundingModeNames[i])) - { - *round = (RoundingMode)i; - name += strlen(gRoundingModeNames[i]); - break; - } - - if (*name != '_') return -2; - name++; - - for (i = 0; i < kTypeCount; i++) - if (name == strstr(name, gTypeNames[i])) - { - *inType = (Type)i; - name += strlen(gTypeNames[i]); - - break; - } - - if (i == kTypeCount) return -3; - - if (*name != '\0') return -4; - - return 0; -} - -#pragma mark - -#pragma mark OpenCL test_status InitCL(cl_device_id device) { @@ -678,6 +412,20 @@ test_status InitCL(cl_device_id device) } gTestDouble &= gHasDouble; + // detect whether profile of the device is embedded + char profile[1024] = ""; + if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), + profile, NULL))) + { + vlog_error("clGetDeviceInfo failed. (%d)\n", error); + return TEST_FAIL; + } + else if (strstr(profile, "EMBEDDED_PROFILE")) + { + gIsEmbedded = 1; + if (!is_extension_available(device, "cles_khr_int64")) gHasLong = 0; + } + gContext = clCreateContext(NULL, 1, &device, notify_callback, NULL, &error); if (NULL == gContext || error) { @@ -726,10 +474,6 @@ test_status InitCL(cl_device_id device) } } - - gMTdata = init_genrand(gRandomSeed); - - char c[1024]; static const char *no_yes[] = { "NO", "YES" }; vlog("\nCompute Device info:\n"); @@ -760,977 +504,4 @@ test_status InitCL(cl_device_id device) return TEST_PASS; } -static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, - size_t blockCount) -{ - // The global dimensions are just the blockCount to execute since we haven't - // set up multiple queues for multiple devices. - int error; - - error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf); - error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf); - - if (error) - { - vlog_error("FAILED -- could not set kernel args (%d)\n", error); - return error; - } - - if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount, - NULL, 0, NULL, NULL))) - { - vlog_error("FAILED -- could not execute kernel (%d)\n", error); - return error; - } - - return 0; -} - -#if defined(__APPLE__) -#include <mach/mach_time.h> -#endif - -uint64_t GetTime(void); -uint64_t GetTime(void) -{ -#if defined(__APPLE__) - return mach_absolute_time(); -#elif defined(_MSC_VER) - return ReadTime(); -#else - // mach_absolute_time is a high precision timer with precision < 1 - // microsecond. -#warning need accurate clock here. Times are invalid. - return 0; -#endif -} - - -#if defined(_MSC_VER) -/* function is defined in "compat.h" */ -#else -double SubtractTime(uint64_t endTime, uint64_t startTime); -double SubtractTime(uint64_t endTime, uint64_t startTime) -{ - uint64_t diff = endTime - startTime; - static double conversion = 0.0; - - if (0.0 == conversion) - { -#if defined(__APPLE__) - mach_timebase_info_data_t info = { 0, 0 }; - kern_return_t err = mach_timebase_info(&info); - if (0 == err) - conversion = 1e-9 * (double)info.numer / (double)info.denom; -#else - // This function consumes output from GetTime() above, and converts the - // time to secionds. -#warning need accurate ticks to seconds conversion factor here. Times are invalid. -#endif - } - - // strictly speaking we should also be subtracting out timer latency here - return conversion * (double)diff; -} -#endif - -typedef struct CalcReferenceValuesInfo -{ - struct WriteInputBufferInfo - *parent; // pointer back to the parent WriteInputBufferInfo struct - cl_kernel kernel; // the kernel for this vector size - cl_program program; // the program for this vector size - cl_uint vectorSize; // the vector size for this callback chain - void *p; // the pointer to mapped result data for this vector size - cl_int result; -} CalcReferenceValuesInfo; - -typedef struct WriteInputBufferInfo -{ - volatile cl_event - calcReferenceValues; // user event which signals when main thread is - // done calculating reference values - volatile cl_event - doneBarrier; // user event which signals when worker threads are done - cl_uint count; // the number of elements in the array - Type outType; // the data type of the conversion result - Type inType; // the data type of the conversion input - volatile int barrierCount; - CalcReferenceValuesInfo calcInfo[kCallStyleCount]; -} WriteInputBufferInfo; - -cl_uint RoundUpToNextPowerOfTwo(cl_uint x); -cl_uint RoundUpToNextPowerOfTwo(cl_uint x) -{ - if (0 == (x & (x - 1))) return x; - - while (x & (x - 1)) x &= x - 1; - - return x + x; -} - -void WriteInputBufferComplete(void *); - -typedef struct DataInitInfo -{ - cl_ulong start; - cl_uint size; - Type outType; - Type inType; - SaturationMode sat; - RoundingMode round; - MTdata *d; -} DataInitInfo; - -cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p); -cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p) -{ - DataInitInfo *info = (DataInitInfo *)p; - - gInitFunctions[info->inType]( - (char *)gIn + job_id * info->size * gTypeSizes[info->inType], info->sat, - info->round, info->outType, info->start + job_id * info->size, - info->size, info->d[thread_id]); - return CL_SUCCESS; -} - -static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count) -{ - cl_uint i; - for (i = 0; i < count; ++i) - allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0); -} - -cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p); -cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p) -{ - DataInitInfo *info = (DataInitInfo *)p; - cl_uint count = info->size; - Type inType = info->inType; - Type outType = info->outType; - RoundingMode round = info->round; - size_t j; - - Force64BitFPUPrecision(); - - void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType]; - void *a = (cl_uchar *)gAllowZ + job_id * count; - void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType]; - - if (outType != inType) - { - // create the reference while we wait - Convert f = gConversions[outType][inType]; - if (info->sat) f = gSaturatedConversions[outType][inType]; - -#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__) - /* ARM VFP doesn't have hardware instruction for converting from 64-bit - * integer to float types, hence GCC ARM uses the floating-point - * emulation code despite which -mfloat-abi setting it is. But the - * emulation code in libgcc.a has only one rounding mode (round to - * nearest even in this case) and ignores the user rounding mode setting - * in hardware. As a result setting rounding modes in hardware won't - * give correct rounding results for type covert from 64-bit integer to - * float using GCC for ARM compiler so for testing different rounding - * modes, we need to use alternative reference function. ARM64 does have - * an instruction, however we cannot guarantee the compiler will use it. - * On all ARM architechures use emulation to calculate reference.*/ - switch (round) - { - /* conversions to floating-point type use the current rounding mode. - * The only default floating-point rounding mode supported is round - * to nearest even i.e the current rounding mode will be _rte for - * floating-point types. */ - case kDefaultRoundingMode: qcom_rm = qcomRTE; break; - case kRoundToNearestEven: qcom_rm = qcomRTE; break; - case kRoundUp: qcom_rm = qcomRTP; break; - case kRoundDown: qcom_rm = qcomRTN; break; - case kRoundTowardZero: qcom_rm = qcomRTZ; break; - default: - vlog_error("ERROR: undefined rounding mode %d\n", round); - break; - } - qcom_sat = info->sat; -#endif - - RoundingMode oldRound = set_round(round, outType); - f(d, s, count); - set_round(oldRound, outType); - - // Decide if we allow a zero result in addition to the correctly rounded - // one - memset(a, 0, count); - if (gForceFTZ) - { - if (inType == kfloat) setAllowZ((uint8_t *)a, (uint32_t *)s, count); - if (outType == kfloat) - setAllowZ((uint8_t *)a, (uint32_t *)d, count); - } - } - else - { - // Copy the input to the reference - memcpy(d, s, info->size * gTypeSizes[inType]); - } - - // Patch up NaNs conversions to integer to zero -- these can be converted to - // any integer - if (info->outType != kfloat && info->outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)d + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((double *)d)[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)s; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) ((float *)d)[j] = NAN; - } - } - } - - return CL_SUCCESS; -} - -static int DoTest(cl_device_id device, Type outType, Type inType, - SaturationMode sat, RoundingMode round, MTdata d) -{ -#ifdef __APPLE__ - cl_ulong wall_start = mach_absolute_time(); -#endif - - DataInitInfo init_info = { 0, 0, outType, inType, sat, round, NULL }; - WriteInputBufferInfo writeInputBufferInfo; - int vectorSize; - int error = 0; - cl_uint threads = GetThreadCount(); - uint64_t i; - - gTestCount++; - size_t blockCount = - BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]); - size_t step = blockCount; - uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]); - - memset(&writeInputBufferInfo, 0, sizeof(writeInputBufferInfo)); - init_info.d = (MTdata *)malloc(threads * sizeof(MTdata)); - if (NULL == init_info.d) - { - vlog_error( - "ERROR: Unable to allocate storage for random number generator!\n"); - return -1; - } - for (i = 0; i < threads; i++) - { - init_info.d[i] = init_genrand(genrand_int32(d)); - if (NULL == init_info.d[i]) - { - vlog_error("ERROR: Unable to allocate storage for random number " - "generator!\n"); - return -1; - } - } - - writeInputBufferInfo.outType = outType; - writeInputBufferInfo.inType = inType; - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - writeInputBufferInfo.calcInfo[vectorSize].program = - MakeProgram(outType, inType, sat, round, vectorSize, - &writeInputBufferInfo.calcInfo[vectorSize].kernel); - if (NULL == writeInputBufferInfo.calcInfo[vectorSize].program) - { - gFailCount++; - return -1; - } - if (NULL == writeInputBufferInfo.calcInfo[vectorSize].kernel) - { - gFailCount++; - vlog_error("\t\tFAILED -- Failed to create kernel.\n"); - return -2; - } - - writeInputBufferInfo.calcInfo[vectorSize].parent = - &writeInputBufferInfo; - writeInputBufferInfo.calcInfo[vectorSize].vectorSize = vectorSize; - writeInputBufferInfo.calcInfo[vectorSize].result = -1; - } - - if (gSkipTesting) goto exit; - - // Patch up rounding mode if default is RTZ - // We leave the part above in default rounding mode so that the right kernel - // is compiled. - if (round == kDefaultRoundingMode && gIsRTZ && (outType == kfloat)) - init_info.round = round = kRoundTowardZero; - - // Figure out how many elements are in a work block - - // we handle 64-bit types a bit differently. - if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL; - - if (!gWimpyMode && gIsEmbedded) - step = blockCount * EMBEDDED_REDUCTION_FACTOR; - - if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor; - vlog("Testing... "); - fflush(stdout); - for (i = 0; i < (uint64_t)lastCase; i += step) - { - - if (0 == (i & ((lastCase >> 3) - 1))) - { - vlog("."); - fflush(stdout); - } - - cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i); - writeInputBufferInfo.count = count; - - // Crate a user event to represent the status of the reference value - // computation completion - writeInputBufferInfo.calcReferenceValues = - clCreateUserEvent(gContext, &error); - if (error || NULL == writeInputBufferInfo.calcReferenceValues) - { - vlog_error("ERROR: Unable to create user event. (%d)\n", error); - gFailCount++; - goto exit; - } - - // retain for consumption by MapOutputBufferComplete - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - if ((error = - clRetainEvent(writeInputBufferInfo.calcReferenceValues))) - { - vlog_error("ERROR: Unable to retain user event. (%d)\n", error); - gFailCount++; - goto exit; - } - } - - // Crate a user event to represent when the callbacks are done verifying - // correctness - writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error); - if (error || NULL == writeInputBufferInfo.calcReferenceValues) - { - vlog_error("ERROR: Unable to create user event for barrier. (%d)\n", - error); - gFailCount++; - goto exit; - } - - // retain for use by the callback that calls this - if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier))) - { - vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n", - error); - gFailCount++; - goto exit; - } - - // Call this in a multithreaded manner - // gInitFunctions[ inType ]( gIn, sat, round, outType, i, count, d - // ); - cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2; - init_info.start = i; - init_info.size = count / chunks; - if (init_info.size < 16384) - { - chunks = RoundUpToNextPowerOfTwo(threads); - init_info.size = count / chunks; - if (init_info.size < 16384) - { - init_info.size = count; - chunks = 1; - } - } - ThreadPool_Do(InitData, chunks, &init_info); - - // Copy the results to the device - if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0, - count * gTypeSizes[inType], gIn, 0, - NULL, NULL))) - { - vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error); - gFailCount++; - goto exit; - } - - // Call completion callback for the write, which will enqueue the rest - // of the work. - WriteInputBufferComplete((void *)&writeInputBufferInfo); - - // Make sure the work is actually running, so we don't deadlock - if ((error = clFlush(gQueue))) - { - vlog_error("clFlush failed with error %d\n", error); - gFailCount++; - goto exit; - } - - ThreadPool_Do(PrepareReference, chunks, &init_info); - - // signal we are done calculating the reference results - if ((error = clSetUserEventStatus( - writeInputBufferInfo.calcReferenceValues, CL_COMPLETE))) - { - vlog_error( - "Error: Failed to set user event status to CL_COMPLETE: %d\n", - error); - gFailCount++; - goto exit; - } - - // Wait for the event callbacks to finish verifying correctness. - if ((error = clWaitForEvents( - 1, (cl_event *)&writeInputBufferInfo.doneBarrier))) - { - vlog_error("Error: Failed to wait for barrier: %d\n", error); - gFailCount++; - goto exit; - } - - if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues))) - { - vlog_error("Error: Failed to release calcReferenceValues: %d\n", - error); - gFailCount++; - goto exit; - } - - if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier))) - { - vlog_error("Error: Failed to release done barrier: %d\n", error); - gFailCount++; - goto exit; - } - - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - if ((error = writeInputBufferInfo.calcInfo[vectorSize].result)) - { - switch (inType) - { - case kuchar: - case kchar: - vlog("Input value: 0x%2.2x ", - ((unsigned char *)gIn)[error - 1]); - break; - case kushort: - case kshort: - vlog("Input value: 0x%4.4x ", - ((unsigned short *)gIn)[error - 1]); - break; - case kuint: - case kint: - vlog("Input value: 0x%8.8x ", - ((unsigned int *)gIn)[error - 1]); - break; - case kfloat: - vlog("Input value: %a ", ((float *)gIn)[error - 1]); - break; - break; - case kulong: - case klong: - vlog("Input value: 0x%16.16llx ", - ((unsigned long long *)gIn)[error - 1]); - break; - case kdouble: - vlog("Input value: %a ", ((double *)gIn)[error - 1]); - break; - default: - vlog_error("Internal error at %s: %d\n", __FILE__, - __LINE__); - abort(); - break; - } - - // tell the user which conversion it was. - if (0 == vectorSize) - vlog(" (implicit scalar conversion from %s to %s)\n", - gTypeNames[inType], gTypeNames[outType]); - else - vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType], - sizeNames[vectorSize], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType], - sizeNames[vectorSize]); - - gFailCount++; - goto exit; - } - } - } - - log_info("done.\n"); - - if (gTimeResults) - { - // Kick off tests for the various vector lengths - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; - vectorSize++) - { - size_t workItemCount = blockCount / vectorSizes[vectorSize]; - if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4) - workItemCount /= - 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]); - - double sum = 0.0; - double bestTime = INFINITY; - cl_uint k; - for (k = 0; k < PERF_LOOP_COUNT; k++) - { - uint64_t startTime = GetTime(); - if ((error = RunKernel( - writeInputBufferInfo.calcInfo[vectorSize].kernel, - gInBuffer, gOutBuffers[vectorSize], workItemCount))) - { - gFailCount++; - goto exit; - } - - // Make sure OpenCL is done - if ((error = clFinish(gQueue))) - { - vlog_error("Error %d at clFinish\n", error); - goto exit; - } - - uint64_t endTime = GetTime(); - double time = SubtractTime(endTime, startTime); - sum += time; - if (time < bestTime) bestTime = time; - } - - if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT; - double clocksPerOp = bestTime * (double)gDeviceFrequency - * gComputeDevices * gSimdSize * 1e6 - / (workItemCount * vectorSizes[vectorSize]); - if (0 == vectorSize) - vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", - "implicit convert %s -> %s", gTypeNames[inType], - gTypeNames[outType]); - else - vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element", - "convert_%s%s%s%s( %s%s )", gTypeNames[outType], - sizeNames[vectorSize], gSaturationNames[sat], - gRoundingModeNames[round], gTypeNames[inType], - sizeNames[vectorSize]); - } - } - - if (gWimpyMode) - vlog("\tWimp pass"); - else - vlog("\tpassed"); - -#ifdef __APPLE__ - // record the run time - vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start)); -#endif - vlog("\n\n"); - fflush(stdout); - - -exit: - // clean up - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - clReleaseProgram(writeInputBufferInfo.calcInfo[vectorSize].program); - clReleaseKernel(writeInputBufferInfo.calcInfo[vectorSize].kernel); - } - - if (init_info.d) - { - for (i = 0; i < threads; i++) free_mtdata(init_info.d[i]); - free(init_info.d); - } - - return error; -} - -void MapResultValuesComplete(void *data); - -// Note: not called reentrantly -void WriteInputBufferComplete(void *data) -{ - cl_int status; - WriteInputBufferInfo *info = (WriteInputBufferInfo *)data; - cl_uint count = info->count; - int vectorSize; - - info->barrierCount = gMaxVectorSize - gMinVectorSize; - - // now that we know that the write buffer is complete, enqueue callbacks to - // wait for the main thread to finish calculating the reference results. - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - size_t workItemCount = - (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]); - - if ((status = RunKernel(info->calcInfo[vectorSize].kernel, gInBuffer, - gOutBuffers[vectorSize], workItemCount))) - { - gFailCount++; - return; - } - - info->calcInfo[vectorSize].p = clEnqueueMapBuffer( - gQueue, gOutBuffers[vectorSize], CL_TRUE, - CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0, - NULL, NULL, &status); - { - if (status) - { - vlog_error("ERROR: WriteInputBufferComplete calback failed " - "with status: %d\n", - status); - gFailCount++; - return; - } - } - } - - for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++) - { - MapResultValuesComplete(info->calcInfo + vectorSize); - } - - // Make sure the work starts moving -- otherwise we may deadlock - if ((status = clFlush(gQueue))) - { - vlog_error( - "ERROR: WriteInputBufferComplete calback failed with status: %d\n", - status); - gFailCount++; - return; - } - - // e was already released by the main thread. It should be destroyed - // automatically soon after we exit. -} -void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, - void *data); - -// Note: May be called reentrantly -void MapResultValuesComplete(void *data) -{ - cl_int status; - CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data; - cl_event calcReferenceValues = info->parent->calcReferenceValues; - - // we know that the map is done, wait for the main thread to finish - // calculating the reference values - if ((status = clSetEventCallback(calcReferenceValues, CL_COMPLETE, - CalcReferenceValuesComplete, data))) - { - vlog_error("ERROR: clSetEventCallback failed in " - "MapResultValuesComplete with status: %d\n", - status); - gFailCount++; // not thread safe -- being lazy here - } - - // this thread no longer needs its reference to info->calcReferenceValues, - // so release it - if ((status = clReleaseEvent(calcReferenceValues))) - { - vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed " - "with status: %d\n", - status); - gFailCount++; // not thread safe -- being lazy here - } - - // no need to flush since we didn't enqueue anything - - // e was already released by WriteInputBufferComplete. It should be - // destroyed automatically soon after we exit. -} - - -void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status, - void *data) -{ - CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data; - cl_uint vectorSize = info->vectorSize; - cl_uint count = info->parent->count; - Type outType = - info->parent->outType; // the data type of the conversion result - Type inType = info->parent->inType; // the data type of the conversion input - size_t j; - cl_int error; - cl_event doneBarrier = info->parent->doneBarrier; - - // report spurious error condition - if (CL_SUCCESS != status) - { - vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n", - status); - gFailCount++; // lazy about thread safety here - return; - } - - // Now we know that both results have been mapped back from the device, and - // the main thread is done calculating the reference results. It is now time - // to check the results. - - // verify results - void *mapped = info->p; - - // Patch up NaNs conversions to integer to zero -- these can be converted to - // any integer - if (outType != kfloat && outType != kdouble) - { - if (inType == kfloat) - { - float *inp = (float *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - if (inType == kdouble) - { - double *inp = (double *)gIn; - for (j = 0; j < count; j++) - { - if (isnan(inp[j])) - memset((char *)mapped + j * gTypeSizes[outType], 0, - gTypeSizes[outType]); - } - } - } - else if (inType == kfloat || inType == kdouble) - { // outtype and intype is float or double. NaN conversions for float <-> - // double can be any NaN - if (inType == kfloat && outType == kdouble) - { - float *inp = (float *)gIn; - double *outp = (double *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - if (inType == kdouble && outType == kfloat) - { - double *inp = (double *)gIn; - float *outp = (float *)mapped; - for (j = 0; j < count; j++) - { - if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN; - } - } - } - - if (memcmp(mapped, gRef, count * gTypeSizes[outType])) - info->result = gCheckResults[outType](mapped, gRef, gAllowZ, count, - vectorSizes[vectorSize]); - else - info->result = 0; - - // Fill the output buffer with junk and release it - { - cl_uint pattern = 0xffffdead; - memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]); - if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize], - mapped, 0, NULL, NULL))) - { - vlog_error("ERROR: clEnqueueUnmapMemObject failed in " - "CalcReferenceValuesComplete (%d)\n", - error); - gFailCount++; - } - } - - if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1)) - { - if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE))) - { - vlog_error("ERROR: clSetUserEventStatus failed in " - "CalcReferenceValuesComplete (err: %d). We're probably " - "going to deadlock.\n", - status); - gFailCount++; - return; - } - - if ((status = clReleaseEvent(doneBarrier))) - { - vlog_error("ERROR: clReleaseEvent failed in " - "CalcReferenceValuesComplete (err: %d).\n", - status); - gFailCount++; - return; - } - } - // e was already released by WriteInputBufferComplete. It should be - // destroyed automatically soon after all the calls to - // CalcReferenceValuesComplete exit. -} - -static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat, - RoundingMode round, int vectorSize, - cl_kernel *outKernel) -{ - cl_program program; - char testName[256]; - int error = 0; - - std::ostringstream source; - if (outType == kdouble || inType == kdouble) - source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"; - - // Create the program. This is a bit complicated because we are trying to - // avoid byte and short stores. - if (0 == vectorSize) - { - // Create the type names. - char inName[32]; - char outName[32]; - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - sprintf(testName, "test_implicit_%s_%s", outName, inName); - - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " dest[i] = src[i];\n"; - source << "}\n"; - - vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], - gTypeNames[outType]); - fflush(stdout); - } - else - { - int vectorSizetmp = vectorSizes[vectorSize]; - - // Create the type names. - char convertString[128]; - char inName[32]; - char outName[32]; - switch (vectorSizetmp) - { - case 1: - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - snprintf(convertString, sizeof(convertString), "convert_%s%s%s", - outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s", convertString, inName); - vlog("Building %s( %s ) test\n", convertString, inName); - break; - case 3: - strncpy(inName, gTypeNames[inType], sizeof(inName)); - strncpy(outName, gTypeNames[outType], sizeof(outName)); - snprintf(convertString, sizeof(convertString), - "convert_%s3%s%s", outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s3", convertString, inName); - vlog("Building %s( %s3 ) test\n", convertString, inName); - break; - default: - snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType], - vectorSizetmp); - snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType], - vectorSizetmp); - snprintf(convertString, sizeof(convertString), "convert_%s%s%s", - outName, gSaturationNames[sat], - gRoundingModeNames[round]); - snprintf(testName, 256, "test_%s_%s", convertString, inName); - vlog("Building %s( %s ) test\n", convertString, inName); - break; - } - fflush(stdout); - - if (vectorSizetmp == 3) - { - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " if( i + 1 < get_global_size(0))\n"; - source << " vstore3( " << convertString - << "( vload3( i, src)), i, dest );\n"; - source << " else\n"; - source << " {\n"; - source << " " << inName << "3 in;\n"; - source << " " << outName << "3 out;\n"; - source << " if( 0 == (i & 1) )\n"; - source << " in.y = src[3*i+1];\n"; - source << " in.x = src[3*i];\n"; - source << " out = " << convertString << "( in ); \n"; - source << " dest[3*i] = out.x;\n"; - source << " if( 0 == (i & 1) )\n"; - source << " dest[3*i+1] = out.y;\n"; - source << " }\n"; - source << "}\n"; - } - else - { - source << "__kernel void " << testName << "( __global " << inName - << " *src, __global " << outName << " *dest )\n"; - source << "{\n"; - source << " size_t i = get_global_id(0);\n"; - source << " dest[i] = " << convertString << "( src[i] );\n"; - source << "}\n"; - } - } - *outKernel = NULL; - - const char *flags = NULL; - if (gForceFTZ) flags = "-cl-denorms-are-zero"; - - // build it - std::string sourceString = source.str(); - const char *programSource = sourceString.c_str(); - error = create_single_kernel_helper(gContext, &program, outKernel, 1, - &programSource, testName, flags); - if (error) - { - vlog_error("Failed to build kernel/program (err = %d).\n", error); - clReleaseProgram(program); - return NULL; - } - - return program; -} diff --git a/test_conformance/d3d10/harness.h b/test_conformance/d3d10/harness.h index 184e52cb..afeb4966 100644 --- a/test_conformance/d3d10/harness.h +++ b/test_conformance/d3d10/harness.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _HARNESS_H_ -#define _HARNESS_H_ +#ifndef HARNESS_H_ +#define HARNESS_H_ #define _CRT_SECURE_NO_WARNINGS diff --git a/test_conformance/device_execution/CMakeLists.txt b/test_conformance/device_execution/CMakeLists.txt index 5e9e30e3..275b96c2 100644 --- a/test_conformance/device_execution/CMakeLists.txt +++ b/test_conformance/device_execution/CMakeLists.txt @@ -17,6 +17,6 @@ set(DEVICE_EXECUTION_SOURCES utils.cpp ) -include(../CMakeCommon.txt) +set_gnulike_module_compile_flags("-Wno-sign-compare") -# end of file # +include(../CMakeCommon.txt) diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp index 29a6cec1..4ddd1db7 100644 --- a/test_conformance/device_execution/enqueue_block.cpp +++ b/test_conformance/device_execution/enqueue_block.cpp @@ -27,561 +27,538 @@ #ifdef CL_VERSION_2_0 extern int gWimpyMode; -static const char* enqueue_simple_block[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_simple_block(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; -static const char* enqueue_block_with_local_arg1[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg1(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; +// clang-format off +static const char* enqueue_simple_block[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } -static const char* enqueue_block_with_local_arg2[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "" - NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)" - NL, "{" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp1[i] = mul * 7 - 21;" - NL, " tmp2[i].x = (float)(mul * 7 - 21);" - NL, " tmp2[i].y = (float)(mul * 7 - 21);" - NL, " tmp2[i].z = (float)(mul * 7 - 21);" - NL, " tmp2[i].w = (float)(mul * 7 - 21);" - NL, "" - NL, " res[tid] += tmp1[i];" - NL, " res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);" - NL, " }" - NL, " res[tid] += 2;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_local_arg2(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)" - NL, " { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };" - NL, "" - NL, " res[tid] = -2;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + kernel void enqueue_simple_block(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_wait_list[] = -{ - NL, "#define BLOCK_SUBMITTED 1" - NL, "#define BLOCK_COMPLETED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_block_with_wait_list(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; -static const char* enqueue_block_with_wait_list_and_local_arg[] = -{ - NL, "#define LOCAL_MEM_SIZE 10" - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define BLOCK_STARTED 3" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)" - NL, "{" - NL, " res[tid] = BLOCK_STARTED;" - NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)" - NL, " {" - NL, " tmp[i] = mul * 7 - 21;" - NL, " res[tid] += tmp[i];" - NL, " }" - NL, " if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt;" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, " - NL, " ^(__local void* buf) {" - NL, " block_fn_local_arg(tid, multiplier, res, (__local int*)buf);" - NL, " }, LOCAL_MEM_SIZE*sizeof(int));" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(block_evt);" - NL, " release_event(block_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt);" - NL, "}" - NL -}; + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_block_get_kernel_work_group_size[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_work_group_size(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; +static const char* enqueue_block_with_local_arg1[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " res[tid] = mul * 7 - 21;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);" - NL, " if (local_work_size <= 0){ res[tid] = -1; return; }" - NL, " size_t global_work_size = local_work_size * 4;" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t q1 = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);" - NL, "" - NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + res[tid] += 2; + } -static const char* enqueue_block_capture_event_profiling_info_after_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)" - NL, "{" - NL, " capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, " release_event(evt);" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg1(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_capture_event_profiling_info_before_execution[] = -{ - NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) - NL, "" - NL, "__global ulong value[MAX_GWS*2] = {0};" - NL, "" - NL, "void block_fn(size_t tid, __global int* res)" - NL, "{" - NL, " res[tid] = -2;" - NL, "}" - NL, "" - NL, "void check_res(size_t tid, const ulong *value, __global int* res)" - NL, "{" - NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;" - NL, " else res[tid] = -4;" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = -1;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t block_evt2;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " release_event(user_evt);" - NL, " release_event(block_evt1);" - NL, " release_event(block_evt2);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); }; -static const char* enqueue_block_with_barrier[] = -{ - NL, "void block_fn(size_t tid, int mul, __global int* res)" - NL, "{" - NL, " if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = mul * 7 -21;" - NL, "}" - NL, "" - NL, "void loop_fn(size_t tid, int n, __global int* res)" - NL, "{" - NL, " while(n > 0)" - NL, " {" - NL, " barrier(CLK_GLOBAL_MEM_FENCE);" - NL, " res[tid] = 0;" - NL, " --n;" - NL, " }" - NL, "}" - NL, "" - NL, "kernel void enqueue_block_with_barrier(__global int* res)" - NL, "{" - NL, " int multiplier = 3;" - NL, " size_t tid = get_global_id(0);" - NL, " queue_t def_q = get_default_queue();" - NL, " res[tid] = -1;" - NL, " size_t n = 256;" - NL, "" - NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };" - NL, "" - NL, " ndrange_t ndrange = ndrange_1D(n);" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "}" - NL -}; + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; -static const char* enqueue_marker_with_block_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_block_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t block_evt1;" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt1);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; +static const char* enqueue_block_with_local_arg2[] = { R"( + #define LOCAL_MEM_SIZE 10 -static const char* enqueue_marker_with_user_event[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_user_event(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, " uint multiplier = 7;" - NL, "" - NL, " clk_event_t user_evt = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " clk_event_t marker_evt;" - NL, " clk_event_t block_evt;" - NL, "" - NL, " int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, " - NL, " ^{" - NL, " if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }" - NL, "" - NL, " set_user_event_status(user_evt, CL_COMPLETE);" - NL, "" - NL, " release_event(block_evt);" - NL, " release_event(marker_evt);" - NL, " release_event(user_evt);" - NL, "}" - NL -}; + void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2) + { + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp1[i] = mul * 7 - 21; + tmp2[i].x = (float)(mul * 7 - 21); + tmp2[i].y = (float)(mul * 7 - 21); + tmp2[i].z = (float)(mul * 7 - 21); + tmp2[i].w = (float)(mul * 7 - 21); + + res[tid] += tmp1[i]; + res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w); + } + res[tid] += 2; + } -static const char* enqueue_marker_with_mixed_events[] = -{ - NL, "#define BLOCK_COMPLETED 1" - NL, "#define BLOCK_SUBMITTED 2" - NL, "#define CHECK_SUCCESS 0" - NL, "" - NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)" - NL, "{" - NL, " size_t tid = get_global_id(0);" - NL, "" - NL, " clk_event_t mix_ev[2];" - NL, " mix_ev[0] = create_user_event();" - NL, "" - NL, " res[tid] = BLOCK_SUBMITTED;" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, "" - NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1]," - NL, " ^{" - NL, " res[tid] = BLOCK_COMPLETED;" - NL, " });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }" - NL, "" - NL, " clk_event_t marker_evt;" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " retain_event(marker_evt);" - NL, " release_event(marker_evt);" - NL, "" - NL, " //check block is not started" - NL, " if(res[tid] == BLOCK_SUBMITTED)" - NL, " {" - NL, " clk_event_t my_evt;" - NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, " - NL, " ^{" - NL, " //check block is completed" - NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;" - NL, " });" - NL, " release_event(my_evt);" - NL, " }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[1]);" - NL, " release_event(marker_evt);" - NL, " release_event(mix_ev[0]);" - NL, "}" - NL -}; + kernel void enqueue_block_with_local_arg2(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); -static const char* enqueue_block_with_mixed_events[] = -{ - NL, "kernel void enqueue_block_with_mixed_events(__global int* res)" - NL, "{" - NL, " int enq_res;" - NL, " size_t tid = get_global_id(0);" - NL, " clk_event_t mix_ev[3];" - NL, " mix_ev[0] = create_user_event();" - NL, " queue_t def_q = get_default_queue();" - NL, " ndrange_t ndrange = ndrange_1D(1);" - NL, " res[tid] = -2;" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }" - NL, "" - NL, " enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }" - NL, "" - NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });" - NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }" - NL, "" - NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);" - NL, "" - NL, " release_event(mix_ev[0]);" - NL, " release_event(mix_ev[1]);" - NL, " release_event(mix_ev[2]);" - NL, "}" - NL -}; + void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2) + { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); }; + + res[tid] = -2; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4))); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_with_wait_list[] = { R"( + #define BLOCK_SUBMITTED 1 + #define BLOCK_COMPLETED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_block_with_wait_list(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_with_wait_list_and_local_arg[] = { R"( + #define LOCAL_MEM_SIZE 10 + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define BLOCK_STARTED 3 + #define CHECK_SUCCESS 0 + + void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp) + { + res[tid] = BLOCK_STARTED; + for (int i = 0; i < LOCAL_MEM_SIZE; i++) + { + tmp[i] = mul * 7 - 21; + res[tid] += tmp[i]; + } + if (res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED; + } + + kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt; + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, + ^(__local void* buf) { + block_fn_local_arg(tid, multiplier, res, (__local int*)buf); + }, LOCAL_MEM_SIZE*sizeof(int)); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(block_evt); + release_event(block_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(user_evt); + release_event(block_evt); + } +)" }; + +static const char* enqueue_block_get_kernel_work_group_size[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_work_group_size(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_work_group_size(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + res[tid] = mul * 7 - 21; + } + + kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock); + if (local_work_size <= 0){ res[tid] = -1; return; } + size_t global_work_size = local_work_size * 4; + + res[tid] = -1; + queue_t q1 = get_default_queue(); + ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size); + + int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_after_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const clk_event_t evt, __global int* res) + { + capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + release_event(evt); + } + + kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res) + { + size_t tid = get_global_id(0); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + } +)" }; + +static const char* enqueue_block_capture_event_profiling_info_before_execution[] = { + "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n" + , R"( + __global ulong value[MAX_GWS*2] = {0}; + + void block_fn(size_t tid, __global int* res) + { + res[tid] = -2; + } + + void check_res(size_t tid, const ulong *value, __global int* res) + { + if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0; + else res[tid] = -4; + } + + kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + clk_event_t user_evt = create_user_event(); + + res[tid] = -1; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + clk_event_t block_evt1; + clk_event_t block_evt2; + + void (^kernelBlock)(void) = ^{ block_fn (tid, res); }; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]); + + set_user_event_status(user_evt, CL_COMPLETE); + + void (^checkBlock) (void) = ^{ check_res(tid, &value, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + release_event(user_evt); + release_event(block_evt1); + release_event(block_evt2); + } +)" }; + +static const char* enqueue_block_with_barrier[] = { R"( + void block_fn(size_t tid, int mul, __global int* res) + { + if (mul > 0) barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = mul * 7 -21; + } + + void loop_fn(size_t tid, int n, __global int* res) + { + while (n > 0) + { + barrier(CLK_GLOBAL_MEM_FENCE); + res[tid] = 0; + --n; + } + } + + kernel void enqueue_block_with_barrier(__global int* res) + { + int multiplier = 3; + size_t tid = get_global_id(0); + queue_t def_q = get_default_queue(); + res[tid] = -1; + size_t n = 256; + + void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); }; + + ndrange_t ndrange = ndrange_1D(n); + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); }; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + } +)" }; + +static const char* enqueue_marker_with_block_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_block_event(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t block_evt1; + clk_event_t marker_evt; + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt1); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_user_event[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_user_event(__global int* res) + { + size_t tid = get_global_id(0); + uint multiplier = 7; + + clk_event_t user_evt = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + clk_event_t marker_evt; + clk_event_t block_evt; + + int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, + ^{ + if (res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS; + }); + + //check block is not started + if (res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; } + + set_user_event_status(user_evt, CL_COMPLETE); + + release_event(block_evt); + release_event(marker_evt); + release_event(user_evt); + } +)" }; + +static const char* enqueue_marker_with_mixed_events[] = { R"( + #define BLOCK_COMPLETED 1 + #define BLOCK_SUBMITTED 2 + #define CHECK_SUCCESS 0 + + kernel void enqueue_marker_with_mixed_events(__global int* res) + { + size_t tid = get_global_id(0); + + clk_event_t mix_ev[2]; + mix_ev[0] = create_user_event(); + + res[tid] = BLOCK_SUBMITTED; + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + + int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], + ^{ + res[tid] = BLOCK_COMPLETED; + }); + if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; } + + clk_event_t marker_evt; + + enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + retain_event(marker_evt); + release_event(marker_evt); + + //check block is not started + if (res[tid] == BLOCK_SUBMITTED) + { + clk_event_t my_evt; + enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, + ^{ + //check block is completed + if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS; + }); + release_event(my_evt); + } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[1]); + release_event(marker_evt); + release_event(mix_ev[0]); + } +)" }; + +static const char* enqueue_block_with_mixed_events[] = { R"( + kernel void enqueue_block_with_mixed_events(__global int* res) + { + int enq_res; + size_t tid = get_global_id(0); + clk_event_t mix_ev[3]; + mix_ev[0] = create_user_event(); + queue_t def_q = get_default_queue(); + ndrange_t ndrange = ndrange_1D(1); + res[tid] = -2; + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; } + + enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]); + if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; } + + enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; }); + if (enq_res != CLK_SUCCESS) { res[tid] = -4; return; } + + set_user_event_status(mix_ev[0], CL_COMPLETE); + + release_event(mix_ev[0]); + release_event(mix_ev[1]); + release_event(mix_ev[2]); + } +)" }; +// clang-format on static const kernel_src sources_enqueue_block[] = { diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt index 4b9968c3..8a4a116a 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt @@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_COMMAND_BUFFER) set(${MODULE_NAME}_SOURCES main.cpp basic_command_buffer.cpp + svm_command_basic.cpp command_buffer_printf.cpp command_buffer_get_command_buffer_info.cpp command_buffer_set_kernel_arg.cpp @@ -14,8 +15,11 @@ set(${MODULE_NAME}_SOURCES command_buffer_test_copy.cpp command_buffer_test_barrier.cpp command_buffer_test_event_info.cpp + command_buffer_finalize.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../../CMakeCommon.txt) add_subdirectory( cl_khr_command_buffer_mutable_dispatch ) diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp index 43734da0..6c02f9f7 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp @@ -201,14 +201,33 @@ struct BasicEnqueueTest : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_int> output_data(num_elements); + std::vector<cl_int> output_data_1(num_elements); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + const cl_int new_pattern = 12; + error = clEnqueueFillBuffer(queue, in_mem, &new_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_int> output_data_2(num_elements); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_2.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(new_pattern, output_data_2[i], i); } return CL_SUCCESS; diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h index a20229e0..d08a11af 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h +++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_BASIC_COMMAND_BUFFER_H -#define _CL_KHR_BASIC_COMMAND_BUFFER_H +#ifndef CL_KHR_BASIC_COMMAND_BUFFER_H +#define CL_KHR_BASIC_COMMAND_BUFFER_H #include "command_buffer_test_base.h" #include "harness/typeWrappers.h" @@ -28,12 +28,24 @@ { \ if (reference != result) \ { \ - log_error("Expected %d was %d at index %u\n", reference, result, \ + log_error("Expected %d was %d at index %zu\n", reference, result, \ index); \ return TEST_FAIL; \ } \ } +// If it is supported get the addresses of all the APIs here. +#define GET_EXTENSION_ADDRESS(FUNC) \ + FUNC = reinterpret_cast<FUNC##_fn>( \ + clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ + if (FUNC == nullptr) \ + { \ + log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ + " with " #FUNC "\n"); \ + return TEST_FAIL; \ + } + + // Helper test fixture for constructing OpenCL objects used in testing // a variety of simple command-buffer enqueue scenarios. struct BasicCommandBufferTest : CommandBufferTestBase @@ -70,6 +82,7 @@ protected: clCommandBufferWrapper command_buffer; }; + template <class T> int MakeAndRunTest(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) @@ -99,4 +112,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context, return TEST_PASS; } -#endif // _CL_KHR_BASIC_COMMAND_BUFFER_H +#endif // CL_KHR_BASIC_COMMAND_BUFFER_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt index e0625833..9b598d8b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt @@ -3,7 +3,15 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH) set(${MODULE_NAME}_SOURCES main.cpp mutable_command_info.cpp + mutable_command_image_arguments.cpp + mutable_command_arguments.cpp + mutable_command_out_of_order.cpp + mutable_command_global_size.cpp + mutable_command_local_size.cpp + mutable_command_global_offset.cpp ../basic_command_buffer.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../../../CMakeCommon.txt) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp index 97075792..a2fae497 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp @@ -26,6 +26,18 @@ test_definition test_list[] = { ADD_TEST(mutable_command_info_global_work_offset), ADD_TEST(mutable_command_info_local_work_size), ADD_TEST(mutable_command_info_global_work_size), + ADD_TEST(mutable_dispatch_image_1d_arguments), + ADD_TEST(mutable_dispatch_image_2d_arguments), + ADD_TEST(mutable_dispatch_out_of_order), + ADD_TEST(mutable_dispatch_simultaneous_out_of_order), + ADD_TEST(mutable_dispatch_global_size), + ADD_TEST(mutable_dispatch_local_size), + ADD_TEST(mutable_dispatch_global_offset), + ADD_TEST(mutable_dispatch_svm_arguments), + ADD_TEST(mutable_dispatch_local_arguments), + ADD_TEST(mutable_dispatch_global_arguments), + ADD_TEST(mutable_dispatch_pod_arguments), + ADD_TEST(mutable_dispatch_null_arguments), }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp new file mode 100644 index 00000000..5c8291f0 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp @@ -0,0 +1,847 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "testHarness.h" +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include <CL/cl.h> +#include <CL/cl_ext.h> +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases for +// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR: +// - __global arguments +// - __local arguments +// - plain-old-data arguments +// - NULL arguments +// - SVM arguments + +struct MutableDispatchGlobalArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchGlobalArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return 0; + } + + cl_int Run() override + { + cl_int error; + + // Create kernel + + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src[tid]; + })"; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + // Create and initialize buffers + + MTdataHolder d(gRandomSeed); + + std::vector<cl_int> srcData(num_elements); + for (size_t i = 0; i < num_elements; i++) + srcData[i] = (cl_int)genrand_int32(d); + + clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + num_elements * sizeof(cl_int), + srcData.data(), &error); + test_error(error, "Creating src buffer"); + + clMemWrapper dstBuf0 = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating initial dst buffer failed"); + + clMemWrapper dstBuf1 = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating updated dst buffer failed"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf); + test_error(error, "Unable to set src kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(dstBuf0), &dstBuf0); + test_error(error, "Unable to set initial dst kernel argument"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the initial execution + + std::vector<cl_int> dstData0(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf0, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData0.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for initial dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData0[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData0[i]); + return TEST_FAIL; + } + } + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg{ 1, sizeof(dstBuf1), &dstBuf1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + std::vector<cl_int> dstData1(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf1, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for modified dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData1[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData1[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +struct MutableDispatchLocalArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchLocalArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return 0; + } + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src1, __local int + *src, __global int *dst) + { + size_t tid = get_global_id(0); + src[tid] = src1[tid]; + dst[tid] = src[tid]; + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threads[1], localThreads[1]; + std::vector<cl_int> constantData; + std::vector<cl_int> resultData; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + size_t sizeToAllocate = + ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData.resize(sizeToAllocate / sizeof(cl_int)); + resultData.resize(sizeToAllocate / sizeof(cl_int)); + + for (size_t i = 0; i < numberOfInts; i++) + constantData[i] = (cl_int)genrand_int32(d); + + clMemWrapper streams[2]; + streams[0] = + clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, + constantData.data(), &error); + test_error(error, "Creating test array failed"); + streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]); + test_error(error, "Unable to set indexed kernel arguments"); + error = + clSetKernelArg(kernel, 1, numberOfInts * sizeof(cl_int), nullptr); + test_error(error, "Unable to set indexed kernel arguments"); + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[1]); + test_error(error, "Unable to set indexed kernel arguments"); + + threads[0] = numberOfInts; + localThreads[0] = 1; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, threads, + localThreads, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_mem), nullptr }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = + clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < numberOfInts; i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", + i, constantData[i], i, resultData[i]); + return TEST_FAIL; + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchPODArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchPODArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, int dst) + { + size_t tid = get_global_id(0); + dst = src[tid]; + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + size_t threads[1], localThreads[1]; + std::vector<cl_int> constantData; + std::vector<cl_int> resultData; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + size_t sizeToAllocate = + ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int); + size_t numberOfInts = sizeToAllocate / sizeof(cl_int); + constantData.resize(sizeToAllocate / sizeof(cl_int)); + resultData.resize(sizeToAllocate / sizeof(cl_int)); + + for (size_t i = 0; i < numberOfInts; i++) + constantData[i] = (cl_int)genrand_int32(d); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate, + constantData.data(), &error); + test_error(error, "Creating test array failed"); + + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + cl_int intarg = 10; + error = clSetKernelArg(kernel, 1, sizeof(cl_int), &intarg); + test_error(error, "Unable to set indexed kernel arguments"); + + threads[0] = numberOfInts; + localThreads[0] = 1; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, threads, + localThreads, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + intarg = 20; + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_int), &intarg }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < numberOfInts; i++) + if (constantData[i] != resultData[i]) + { + log_error("Data failed to verify: constantData[%d]=%d != " + "resultData[%d]=%d\n", + i, constantData[i], i, resultData[i]); + return TEST_FAIL; + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchNullArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchNullArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + cl_int Run() override + { + cl_int error; + + // Create kernel + + const char *sample_const_arg_kernel = + R"( + __kernel void sample_test(__constant int *src, __global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = src ? src[tid] : 12345; + })"; + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + MTdataHolder d(gRandomSeed); + + std::vector<cl_int> srcData(num_elements); + for (size_t i = 0; i < num_elements; i++) + srcData[i] = (cl_int)genrand_int32(d); + + clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, + num_elements * sizeof(cl_int), + srcData.data(), &error); + test_error(error, "Creating src buffer"); + + clMemWrapper dstBuf = + clCreateBuffer(context, CL_MEM_READ_WRITE, + num_elements * sizeof(cl_int), NULL, &error); + test_error(error, "Creating dst buffer failed"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf); + test_error(error, "Unable to set src kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(dstBuf), &dstBuf); + test_error(error, "Unable to set initial dst kernel argument"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the initial execution + + std::vector<cl_int> dstData0(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData0.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for initial dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (srcData[i] != dstData0[i]) + { + log_error("Initial data failed to verify: src[%zu]=%d != " + "dst[%zu]=%d\n", + i, srcData[i], i, dstData0[i]); + return TEST_FAIL; + } + } + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg{ 0, sizeof(cl_mem), nullptr }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + &arg /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + std::vector<cl_int> dstData1(num_elements); + error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0, + num_elements * sizeof(cl_int), + dstData1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer for modified dst failed"); + + for (size_t i = 0; i < num_elements; i++) + { + if (12345 != dstData1[i]) + { + log_error("Modified data failed to verify: %d != dst[%zu]=%d\n", + 12345, i, dstData1[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; + const cl_ulong max_size = 16; +}; + +struct MutableDispatchSVMArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchSVMArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_device_svm_capabilities svm_caps; + bool svm_capabilities = + !clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, + sizeof(svm_caps), &svm_caps, NULL) + && svm_caps != 0; + + return !svm_capabilities || BasicMutableCommandBufferTest::Skip(); + } + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + const char *svm_arguments_kernel = + R"( + typedef struct { + global int* ptr; + } wrapper; + __kernel void test_svm_arguments(__global wrapper* pWrapper) + { + size_t i = get_global_id(0); + pWrapper->ptr[i]++; + })"; + + create_single_kernel_helper(context, &program, &kernel, 1, + &svm_arguments_kernel, + "test_svm_arguments"); + + return 0; + } + + cl_int Run() override + { + const cl_int zero = 0; + cl_int error; + + // Allocate and initialize SVM for initial execution + + cl_int *initWrapper = (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, + sizeof(cl_int *), 0); + cl_int *initBuffer = (cl_int *)clSVMAlloc( + context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0); + test_assert_error(initWrapper != nullptr && initBuffer != nullptr, + "clSVMAlloc failed for initial execution"); + + error = clEnqueueSVMMemcpy(queue, CL_TRUE, initWrapper, &initBuffer, + sizeof(cl_int *), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed for initWrapper"); + + error = clEnqueueSVMMemFill(queue, initBuffer, &zero, sizeof(zero), + num_elements * sizeof(cl_int), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed for initBuffer"); + + // Allocate and initialize SVM for modified execution + + cl_int *newWrapper = + (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0); + cl_int *newBuffer = (cl_int *)clSVMAlloc( + context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0); + test_assert_error(newWrapper != nullptr && newBuffer != nullptr, + "clSVMAlloc failed for modified execution"); + + error = clEnqueueSVMMemcpy(queue, CL_TRUE, newWrapper, &newBuffer, + sizeof(cl_int *), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed for newWrapper"); + + error = clEnqueueSVMMemFill(queue, newBuffer, &zero, sizeof(zero), + num_elements * sizeof(cl_int), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed for newB"); + + // Build and execute the command buffer for the initial execution + + error = clSetKernelArgSVMPointer(kernel, 0, initWrapper); + test_error(error, "clSetKernelArg failed for initWrapper"); + + error = clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(initBuffer), &initBuffer); + test_error(error, "clSetKernelExecInfo failed for initBuffer"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR + | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR, + 0 + }; + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Check the results of the initial execution + + error = + clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, initBuffer, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMap failed for initBuffer"); + + for (size_t i = 0; i < num_elements; i++) + { + if (initBuffer[i] != 1) + { + log_error("Initial verification failed at index %zu: Got %d, " + "wanted 1\n", + i, initBuffer[i]); + return TEST_FAIL; + } + } + + error = clEnqueueSVMUnmap(queue, initBuffer, 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMUnmap failed for initBuffer"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Modify and execute the command buffer + + cl_mutable_dispatch_arg_khr arg_svm{}; + arg_svm.arg_index = 0; + arg_svm.arg_value = newWrapper; + + cl_mutable_dispatch_exec_info_khr exec_info{}; + exec_info.param_name = CL_KERNEL_EXEC_INFO_SVM_PTRS; + exec_info.param_value_size = sizeof(newBuffer); + exec_info.param_value = &newBuffer; + + cl_mutable_dispatch_config_khr dispatch_config{}; + dispatch_config.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR; + dispatch_config.command = command; + dispatch_config.num_svm_args = 1; + dispatch_config.arg_svm_list = &arg_svm; + dispatch_config.num_exec_infos = 1; + dispatch_config.exec_info_list = &exec_info; + + cl_mutable_base_config_khr mutable_config{}; + mutable_config.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR; + mutable_config.num_mutable_dispatch = 1; + mutable_config.mutable_dispatch_list = &dispatch_config; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + // Check the results of the modified execution + + error = + clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, newBuffer, + num_elements * sizeof(cl_int), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMap failed for newBuffer"); + + for (size_t i = 0; i < num_elements; i++) + { + if (newBuffer[i] != 1) + { + log_error("Modified verification failed at index %zu: Got %d, " + "wanted 1\n", + i, newBuffer[i]); + return TEST_FAIL; + } + } + + error = clEnqueueSVMUnmap(queue, newBuffer, 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMUnmap failed for newBuffer"); + + error = clFinish(queue); + test_error(error, "clFinish failed"); + + // Clean up + + clSVMFree(context, initWrapper); + clSVMFree(context, initBuffer); + clSVMFree(context, newWrapper); + clSVMFree(context, newBuffer); + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + + +int test_mutable_dispatch_local_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<MutableDispatchLocalArguments>(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_global_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<MutableDispatchGlobalArguments>(device, context, + queue, num_elements); +} + +int test_mutable_dispatch_pod_arguments(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<MutableDispatchPODArguments>(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_null_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<MutableDispatchNullArguments>(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_svm_arguments(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<MutableDispatchSVMArguments>(device, context, queue, + num_elements); +}
\ No newline at end of file diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h index 9056a00d..19147556 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_MUTABLE_COMMAND_BASIC_H -#define _CL_KHR_MUTABLE_COMMAND_BASIC_H +#ifndef CL_KHR_MUTABLE_COMMAND_BASIC_H +#define CL_KHR_MUTABLE_COMMAND_BASIC_H #include "../basic_command_buffer.h" #include "../command_buffer_test_base.h" @@ -84,24 +84,52 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest &platform, nullptr); test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); - // If it is supported get the addresses of all the APIs here. -#define GET_EXTENSION_ADDRESS(FUNC) \ - FUNC = reinterpret_cast<FUNC##_fn>( \ - clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \ - if (FUNC == nullptr) \ - { \ - log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \ - " with " #FUNC "\n"); \ - return TEST_FAIL; \ + GET_EXTENSION_ADDRESS(clUpdateMutableCommandsKHR); + + return CL_SUCCESS; } + + clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; + + const char* kernelString = "__kernel void empty() {}"; + const size_t global_work_size = 4 * 16; +}; + +struct InfoMutableCommandBufferTest : BasicMutableCommandBufferTest +{ + InfoMutableCommandBufferTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + cl_int error = init_extension_functions(); + test_error(error, "Unable to initialise extension functions"); + + return CL_SUCCESS; + } + + cl_int init_extension_functions() + { + BasicCommandBufferTest::init_extension_functions(); + + cl_platform_id platform; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), + &platform, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + GET_EXTENSION_ADDRESS(clGetMutableCommandInfoKHR); return CL_SUCCESS; } clGetMutableCommandInfoKHR_fn clGetMutableCommandInfoKHR = nullptr; - const char* kernelString = "__kernel void empty() {}"; - const size_t global_work_size = 4 * sizeof(cl_int); }; -#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H
\ No newline at end of file +#undef GET_EXTENSION_ADDRESS + +#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp new file mode 100644 index 00000000..80bc015a --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp @@ -0,0 +1,170 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include <extensionHelpers.h> +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include <CL/cl.h> +#include <CL/cl_ext.h> + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR + +struct MutableDispatchGlobalOffset : InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchGlobalOffset(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *global_offset_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_global_offset(0); + })"; + + cl_int error = + create_single_kernel_helper(context, &program, &kernel, 1, + &global_offset_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + &update_global_offset /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR, + sizeof(info_global_offset), &info_global_offset, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_global_offset != update_global_offset) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector<cl_int> resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i < update_global_offset && 0 != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i >= update_global_offset + && update_global_offset != resultData[i]) + { + log_error("Data failed to verify: update_global_offset != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + return CL_SUCCESS; + } + + size_t info_global_offset = 0; + const size_t update_global_offset = 3; + const size_t sizeToAllocate = + (global_work_size + update_global_offset) * sizeof(cl_int); + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_global_offset(cl_device_id device, cl_context context, + cl_command_queue queue, + int num_elements) +{ + + return MakeAndRunTest<MutableDispatchGlobalOffset>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp new file mode 100644 index 00000000..091f0c8d --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp @@ -0,0 +1,167 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include <extensionHelpers.h> +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include <CL/cl.h> +#include <CL/cl_ext.h> + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR + +struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchGlobalSize(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *global_size_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_global_size(0); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &global_size_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, nullptr, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + &update_global_size /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR, + sizeof(info_global_size), &info_global_size, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_global_size != update_global_size) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector<cl_int> resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i >= update_global_size && global_work_size != resultData[i]) + { + log_error("Data failed to verify: update_global_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i < update_global_size + && update_global_size != resultData[i]) + { + log_error("Data failed to verify: update_global_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + size_t info_global_size = 0; + const size_t update_global_size = 3; + const size_t sizeToAllocate = global_work_size; + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_global_size(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest<MutableDispatchGlobalSize>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp new file mode 100644 index 00000000..b1ce25ec --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp @@ -0,0 +1,427 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include <vector> +#include "imageHelpers.h" +#include "mutable_command_basic.h" + +#include <CL/cl.h> +#include <CL/cl_ext.h> +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases for +// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR: +// - image arguments + +struct MutableDispatchImage1DArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchImage1DArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return CL_SUCCESS; + } + + bool Skip() override + { + cl_bool image_support; + + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(image_support), &image_support, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + return (!mutable_support || !image_support) + || BasicMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *sample_const_arg_kernel = + R"(__kernel void sample_test( read_only image1d_t source, sampler_t + sampler, write_only image1d_t dest) + { + int offset = get_global_id(0); + + int4 color = read_imagei( source, sampler, offset ); + + write_imagei( dest, offset, color ); + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + image_desc.image_type = CL_MEM_OBJECT_IMAGE1D; + image_desc.image_width = 4; + image_desc.image_row_pitch = 0; + image_desc.num_mip_levels = 0; + + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; + + image_descriptor imageInfo = { 0 }; + imageInfo.type = CL_MEM_OBJECT_IMAGE1D; + imageInfo.format = &formats; + imageInfo.width = 4; + + BufferOwningPtr<char> imageValues_input, imageValues_output, outputData; + MTdataHolder d(gRandomSeed); + generate_random_image_data(&imageInfo, imageValues_input, d); + generate_random_image_data(&imageInfo, imageValues_output, d); + generate_random_image_data(&imageInfo, outputData, d); + + char *host_ptr_input = (char *)imageValues_input; + char *host_ptr_output = (char *)imageValues_output; + + clMemWrapper src_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_input, nullptr, &error); + test_error(error, "create_image_1d failed"); + + clMemWrapper dst_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_output, nullptr, &error); + test_error(error, "create_image_2d failed"); + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image); + test_error(error, "Unable to set indexed kernel arguments"); + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + size_t globalDim[3] = { 4, 1, 1 }, localDim[3] = { 1, 1, 1 }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, globalDim, + localDim, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + clMemWrapper new_image = create_image_1d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, 0, host_ptr_output, nullptr, &error); + test_error(error, "create_image_1d failed"); + + cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image }; + cl_mutable_dispatch_arg_khr args[] = { arg_2 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { image_desc.image_width, 1, 1 }; + + error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0, + 0, outputData, 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < imageInfo.width; ++i) + { + if (imageValues_input[i] != outputData[i]) + { + log_error("Data failed to verify: imageValues[%d]=%d != " + "outputData[%d]=%d\n", + i, imageValues_input[i], i, outputData[i]); + + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +struct MutableDispatchImage2DArguments : public BasicMutableCommandBufferTest +{ + using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + + MutableDispatchImage2DArguments(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue) + {} + + virtual cl_int SetUp(int elements) override + { + BasicMutableCommandBufferTest::SetUp(elements); + + return CL_SUCCESS; + } + + bool Skip() override + { + cl_bool image_support; + + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, + sizeof(image_support), &image_support, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed"); + + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + return (!mutable_support || !image_support) + || BasicMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + + const char *sample_const_arg_kernel = + R"(__kernel void sample_test( read_only image2d_t source, sampler_t + sampler, write_only image2d_t dest) + { + int x = get_global_id(0); + int y = get_global_id(1); + + int4 color = read_imagei( source, sampler, (int2) (x, y) ); + + write_imagei( dest, (int2) (x, y), color ); + })"; + + cl_int error; + clProgramWrapper program; + clKernelWrapper kernel; + + cl_image_desc image_desc; + memset(&image_desc, 0x0, sizeof(cl_image_desc)); + image_desc.image_type = CL_MEM_OBJECT_IMAGE2D; + image_desc.image_width = 4; + image_desc.image_height = 4; + image_desc.image_row_pitch = 0; + image_desc.num_mip_levels = 0; + + size_t data_size = + image_desc.image_width * image_desc.image_height * sizeof(cl_int); + + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; + + image_descriptor imageInfo = { 0 }; + imageInfo.type = CL_MEM_OBJECT_IMAGE2D; + imageInfo.width = 4; + imageInfo.height = 4; + imageInfo.format = &formats; + + BufferOwningPtr<char> imageValues_input, imageValues_output; + + MTdataHolder d(gRandomSeed); + generate_random_image_data(&imageInfo, imageValues_input, d); + generate_random_image_data(&imageInfo, imageValues_output, d); + + char *host_ptr_input = (char *)imageValues_input; + char *host_ptr_output = (char *)imageValues_output; + std::vector<char> outputData(data_size); + + clMemWrapper src_image = + create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + &formats, image_desc.image_width, + image_desc.image_height, 0, host_ptr_input, &error); + test_error(error, "create_image_2d failed"); + + clMemWrapper dst_image = create_image_2d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, image_desc.image_height, 0, host_ptr_output, + &error); + test_error(error, "create_image_2d failed"); + + error = create_single_kernel_helper(context, &program, &kernel, 1, + &sample_const_arg_kernel, + "sample_test"); + test_error(error, "Creating kernel failed"); + + clSamplerWrapper sampler = clCreateSampler( + context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error); + test_error(error, "Unable to create sampler"); + + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image); + test_error(error, "Unable to set indexed kernel arguments"); + + size_t globalDim[3] = { 4, 4, 1 }, localDim[3] = { 1, 1, 1 }; + + cl_ndrange_kernel_command_properties_khr props[] = { + CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR, + CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0 + }; + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, props, kernel, 1, nullptr, globalDim, + localDim, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + clMemWrapper new_image = create_image_2d( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats, + image_desc.image_width, image_desc.image_height, 0, + imageValues_output, &error); + test_error(error, "create_image_2d failed"); + + cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image }; + cl_mutable_dispatch_arg_khr args[] = { arg_2 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + size_t origin[3] = { 0, 0, 0 }; + size_t region[3] = { image_desc.image_width, image_desc.image_height, + 1 }; + + error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0, + 0, outputData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < imageInfo.width * imageInfo.height; ++i) + { + if (imageValues_input[i] != outputData[i]) + { + log_error("Data failed to verify: imageValues[%d]=%d != " + "outputData[%d]=%d\n", + i, imageValues_input[i], i, outputData[i]); + return TEST_FAIL; + } + } + + return TEST_PASS; + } + + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_image_1d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<MutableDispatchImage1DArguments>(device, context, + queue, num_elements); +} + +int test_mutable_dispatch_image_2d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<MutableDispatchImage2DArguments>(device, context, + queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp index cc425a4d..61600dc9 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp @@ -42,13 +42,13 @@ // CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR // CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR -struct InfoDeviceQuery : public BasicMutableCommandBufferTest +struct InfoDeviceQuery : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoDeviceQuery(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -71,12 +71,12 @@ struct InfoDeviceQuery : public BasicMutableCommandBufferTest } }; -struct InfoBuffer : public BasicMutableCommandBufferTest +struct InfoBuffer : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoBuffer(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -108,13 +108,13 @@ struct InfoBuffer : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct PropertiesArray : public BasicMutableCommandBufferTest +struct PropertiesArray : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; PropertiesArray(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -140,7 +140,7 @@ struct PropertiesArray : public BasicMutableCommandBufferTest if (size != sizeof(props) || test_props[0] != props[0] || test_props[1] != props[1]) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect properties returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -154,12 +154,12 @@ struct PropertiesArray : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Kernel : public BasicMutableCommandBufferTest +struct Kernel : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Kernel(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -181,7 +181,7 @@ struct Kernel : public BasicMutableCommandBufferTest // opaque object. if (test_kernel != kernel) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect kernel returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -195,12 +195,12 @@ struct Kernel : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct Dimensions : public BasicMutableCommandBufferTest +struct Dimensions : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; Dimensions(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -210,8 +210,7 @@ struct Dimensions : public BasicMutableCommandBufferTest &global_work_size, nullptr, 0, nullptr, nullptr, &command); test_error(error, "clCommandNDRangeKernelKHR failed"); - size_t test_dimensions; - + cl_uint test_dimensions = 0; error = clGetMutableCommandInfoKHR( command, CL_MUTABLE_DISPATCH_DIMENSIONS_KHR, sizeof(test_dimensions), &test_dimensions, nullptr); @@ -219,7 +218,7 @@ struct Dimensions : public BasicMutableCommandBufferTest if (test_dimensions != dimensions) { - log_error("ERROR: Incorrect command buffer returned from " + log_error("ERROR: Incorrect dimensions returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -234,12 +233,12 @@ struct Dimensions : public BasicMutableCommandBufferTest const size_t dimensions = 3; }; -struct InfoType : public BasicMutableCommandBufferTest +struct InfoType : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoType(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -271,12 +270,12 @@ struct InfoType : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoQueue : public BasicMutableCommandBufferTest +struct InfoQueue : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoQueue(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -308,13 +307,13 @@ struct InfoQueue : public BasicMutableCommandBufferTest cl_mutable_command_khr command = nullptr; }; -struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest +struct InfoGlobalWorkOffset : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkOffset(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -330,7 +329,7 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest if (test_global_work_offset != global_work_offset) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong global work offset returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -346,13 +345,13 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest size_t test_global_work_offset = 0; }; -struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest +struct InfoGlobalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoGlobalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -368,7 +367,7 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest if (test_global_work_size != global_work_size) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong global work size returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } @@ -383,13 +382,13 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest size_t test_global_work_size = 0; }; -struct InfoLocalWorkSize : public BasicMutableCommandBufferTest +struct InfoLocalWorkSize : public InfoMutableCommandBufferTest { - using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest; + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; InfoLocalWorkSize(cl_device_id device, cl_context context, cl_command_queue queue) - : BasicMutableCommandBufferTest(device, context, queue) + : InfoMutableCommandBufferTest(device, context, queue) {} cl_int Run() override @@ -405,7 +404,7 @@ struct InfoLocalWorkSize : public BasicMutableCommandBufferTest if (test_local_work_size != local_work_size) { - log_error("ERROR: Wrong size returned from " + log_error("ERROR: Wrong local work size returned from " "clGetMutableCommandInfoKHR."); return TEST_FAIL; } diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp new file mode 100644 index 00000000..22a9da6d --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp @@ -0,0 +1,174 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include <extensionHelpers.h> +#include "typeWrappers.h" +#include "procs.h" +#include "testHarness.h" +#include "mutable_command_basic.h" +#include <vector> + +#include <CL/cl.h> +#include <CL/cl_ext.h> + +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// +// CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR + +struct MutableDispatchLocalSize : public InfoMutableCommandBufferTest +{ + using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest; + + MutableDispatchLocalSize(cl_device_id device, cl_context context, + cl_command_queue queue) + : InfoMutableCommandBufferTest(device, context, queue) + {} + + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR; + + return !mutable_support || InfoMutableCommandBufferTest::Skip(); + } + + cl_int Run() override + { + const char *local_size_kernel = + R"( + __kernel void sample_test(__global int *dst) + { + size_t tid = get_global_id(0); + dst[tid] = get_local_size(0); + })"; + + cl_int error = create_single_kernel_helper( + context, &program, &kernel, 1, &local_size_kernel, "sample_test"); + test_error(error, "Creating kernel failed"); + + clMemWrapper stream; + stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate, + nullptr, &error); + test_error(error, "Creating test array failed"); + + /* Set the arguments */ + error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream); + test_error(error, "Unable to set indexed kernel arguments"); + + error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &global_work_size, &local_work_size, 0, nullptr, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clFinish(queue); + test_error(error, "clFinish failed."); + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 0 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + nullptr /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + &update_global_size /* global_work_size */, + &update_local_size /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clGetMutableCommandInfoKHR( + command, CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR, + sizeof(info_local_size), &info_local_size, nullptr); + test_error(error, "clGetMutableCommandInfoKHR failed"); + + if (info_local_size != update_local_size) + { + log_error("ERROR: Wrong size returned from " + "clGetMutableCommandInfoKHR."); + return TEST_FAIL; + } + + std::vector<cl_int> resultData; + resultData.resize(num_elements); + + error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate, + resultData.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + if (i < update_global_size && update_local_size != resultData[i]) + { + log_error("Data failed to verify: update_local_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + else if (i >= update_global_size + && local_work_size != resultData[i]) + { + log_error("Data failed to verify: update_local_size != " + "resultData[%d]=%d\n", + i, resultData[i]); + return TEST_FAIL; + } + + return CL_SUCCESS; + } + + size_t info_local_size = 0; + const size_t global_work_size = 16; + const size_t local_work_size = 8; + const size_t update_global_size = 8; + const size_t update_local_size = 4; + const size_t sizeToAllocate = 64; + const size_t num_elements = sizeToAllocate / sizeof(cl_int); + + cl_mutable_command_khr command = nullptr; +}; + +int test_mutable_dispatch_local_size(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest<MutableDispatchLocalSize>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp new file mode 100644 index 00000000..d507dadf --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp @@ -0,0 +1,454 @@ +// +// Copyright (c) 2022 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include <extensionHelpers.h> +#include <vector> +#include "mutable_command_basic.h" + +#include <CL/cl.h> +#include <CL/cl_ext.h> +//////////////////////////////////////////////////////////////////////////////// +// mutable dispatch tests which handle following cases: +// - simultaneous use +// - cross-queue simultaneous-use + +namespace { + +template <bool simultaneous_request> +struct OutOfOrderTest : public BasicMutableCommandBufferTest +{ + OutOfOrderTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicMutableCommandBufferTest(device, context, queue), + out_of_order_queue(nullptr), out_of_order_command_buffer(this), + user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr), + program_fill(nullptr) + { + simultaneous_use_requested = simultaneous_request; + if (simultaneous_request) buffer_size_multiplier = 2; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernel() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernel(); + test_error(error, "BasicMutableCommandBufferTest::SetUpKernel failed"); + + // create additional kernel to properly prepare output buffer for test + const char* kernel_str = + R"( + __kernel void fill(int pattern, __global int* out, __global int* + offset) + { + size_t id = get_global_id(0); + size_t ind = offset[0] + id ; + out[ind] = pattern; + })"; + + error = create_single_kernel_helper_create_program( + context, &program_fill, 1, &kernel_str); + test_error(error, "Failed to create program with source"); + + error = + clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr); + test_error(error, "Failed to build program"); + + kernel_fill = clCreateKernel(program_fill, "fill", &error); + test_error(error, "Failed to create copy kernel"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUpKernelArgs() override + { + cl_int error = BasicMutableCommandBufferTest::SetUpKernelArgs(); + test_error(error, + "BasicMutableCommandBufferTest::SetUpKernelArgs failed"); + + error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int), + &overwritten_pattern); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem); + test_error(error, "clSetKernelArg failed"); + + error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem); + test_error(error, "clSetKernelArg failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int SetUp(int elements) override + { + cl_int error = BasicMutableCommandBufferTest::SetUp(elements); + test_error(error, "BasicMutableCommandBufferTest::SetUp failed"); + + error = SetUpKernel(); + test_error(error, "SetUpKernel failed"); + + out_of_order_queue = clCreateCommandQueue( + context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error); + test_error(error, "Unable to create command queue to test with"); + + cl_command_buffer_properties_khr properties[3] = { + CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0 + }; + + out_of_order_command_buffer = clCreateCommandBufferKHR( + 1, &out_of_order_queue, properties, &error); + test_error(error, "clCreateCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + bool Skip() override + { + cl_mutable_dispatch_fields_khr mutable_capabilities; + + bool mutable_support = + !clGetDeviceInfo( + device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, + sizeof(mutable_capabilities), &mutable_capabilities, nullptr) + && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR; + + + return !out_of_order_support + || (simultaneous_use_requested && !simultaneous_use_support) + || !mutable_support || BasicMutableCommandBufferTest::Skip(); + } + + //-------------------------------------------------------------------------- + cl_int Run() override + { + cl_int error = CL_SUCCESS; + + if (simultaneous_use_support) + { + // enqueue simultaneous command-buffers with out-of-order calls + error = RunSimultaneous(); + test_error(error, "RunSimultaneous failed"); + } + else + { + // enqueue single command-buffer with out-of-order calls + error = RunSingle(); + test_error(error, "RunSingle failed"); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + const cl_int pattern = pattern_pri; + cl_int error = + clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem, + &pattern, sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, + out_mem, &overwritten_pattern, + sizeof(cl_int), 0, data_size(), 0, + nullptr, &sync_points[1], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSingle() + { + cl_int error; + + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_int> output_data(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_TRUE, 0, + data_size(), output_data.data(), 1, + &single_event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i); + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RecordSimultaneousCommandBuffer() + { + cl_sync_point_khr sync_points[2]; + // for both simultaneous passes this call will fill entire in_mem buffer + cl_int error = clCommandFillBufferKHR( + out_of_order_command_buffer, nullptr, in_mem, &pattern_pri, + sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr, + &sync_points[0], nullptr); + test_error(error, "clCommandFillBufferKHR failed"); + + // to avoid overwriting the entire result buffer instead of filling + // only relevant part this additional kernel was introduced + + error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr, + nullptr, kernel_fill, 1, nullptr, + &num_elements, nullptr, 0, nullptr, + &sync_points[1], &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clCommandNDRangeKernelKHR( + out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr, + &num_elements, nullptr, 2, sync_points, nullptr, &command); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(out_of_order_command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + struct SimulPassData + { + cl_int offset; + std::vector<cl_int> output_buffer; + // 0:user event, 1:offset-buffer fill event, 2:kernel done event + clEventWrapper wait_events[3]; + }; + + //-------------------------------------------------------------------------- + cl_int EnqueueSimultaneousPass(SimulPassData& pd) + { + cl_int error = CL_SUCCESS; + if (!user_event) + { + user_event = clCreateUserEvent(context, &error); + test_error(error, "clCreateUserEvent failed"); + } + + pd.wait_events[0] = user_event; + + // filling offset buffer must wait for previous pass completeness + error = clEnqueueFillBuffer( + out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0, + sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0), + (wait_pass_event != nullptr ? &wait_pass_event : nullptr), + &pd.wait_events[1]); + test_error(error, "clEnqueueFillBuffer failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, + sizeof(cl_int) * num_elements + * buffer_size_multiplier, + nullptr, &error); + test_error(error, "clCreateBuffer failed"); + + cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem), + &new_out_mem }; + cl_mutable_dispatch_arg_khr args[] = { arg_1 }; + + cl_mutable_dispatch_config_khr dispatch_config{ + CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR, + nullptr, + command, + 1 /* num_args */, + 0 /* num_svm_arg */, + 0 /* num_exec_infos */, + 0 /* work_dim - 0 means no change to dimensions */, + args /* arg_list */, + nullptr /* arg_svm_list - nullptr means no change*/, + nullptr /* exec_info_list */, + nullptr /* global_work_offset */, + nullptr /* global_work_size */, + nullptr /* local_work_size */ + }; + cl_mutable_base_config_khr mutable_config{ + CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1, + &dispatch_config + }; + + error = clUpdateMutableCommandsKHR(out_of_order_command_buffer, + &mutable_config); + test_error(error, "clUpdateMutableCommandsKHR failed"); + + // command buffer execution must wait for two wait-events + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0], + &pd.wait_events[2]); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_FALSE, + pd.offset * sizeof(cl_int), data_size(), + pd.output_buffer.data(), 1, + &pd.wait_events[2], nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + cl_int RunSimultaneous() + { + cl_int error = RecordSimultaneousCommandBuffer(); + test_error(error, "RecordSimultaneousCommandBuffer failed"); + + cl_int offset = static_cast<cl_int>(num_elements); + + std::vector<SimulPassData> simul_passes = { + { 0, std::vector<cl_int>(num_elements) }, + { offset, std::vector<cl_int>(num_elements) } + }; + + for (auto&& pass : simul_passes) + { + error = EnqueueSimultaneousPass(pass); + test_error(error, "EnqueueSimultaneousPass failed"); + + wait_pass_event = pass.wait_events[2]; + } + + error = clSetUserEventStatus(user_event, CL_COMPLETE); + test_error(error, "clSetUserEventStatus failed"); + + error = clFinish(out_of_order_queue); + test_error(error, "clFinish failed"); + + // verify the result buffers + for (auto&& pass : simul_passes) + { + auto& res_data = pass.output_buffer; + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i); + } + } + + return CL_SUCCESS; + } + + //-------------------------------------------------------------------------- + clCommandQueueWrapper out_of_order_queue; + clCommandBufferWrapper out_of_order_command_buffer; + + clEventWrapper user_event; + clEventWrapper single_event; + clEventWrapper wait_pass_event; + + clKernelWrapper kernel_fill; + clProgramWrapper program_fill; + + const size_t test_global_work_size = 3 * sizeof(cl_int); + cl_mutable_command_khr command = nullptr; + + const cl_int overwritten_pattern = 0xACDC; + const cl_int pattern_pri = 42; +}; + +} // anonymous namespace + +int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue, + num_elements); +} + +int test_mutable_dispatch_simultaneous_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements) +{ + return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue, + num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h index 08512cae..1db48917 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H -#define _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +#ifndef CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H +#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H #include <CL/cl.h> @@ -59,4 +59,51 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_mutable_dispatch_image_1d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_image_2d_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_global_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_local_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_pod_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_null_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_svm_arguments(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_out_of_order(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_simultaneous_out_of_order( + cl_device_id device, cl_context context, cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_global_size(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_local_size(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); +extern int test_mutable_dispatch_global_offset(cl_device_id device, + cl_context context, + cl_command_queue queue, + int num_elements); #endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/ diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp index be8530b2..6ef26bb9 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp @@ -603,12 +603,15 @@ struct CommandBufferEventSync : public BasicCommandBufferTest event_ptrs[1], nullptr); test_error(error, "clEnqueueReadBuffer failed"); - error = clFinish(queue); - test_error(error, "clFinish failed"); + error = clFlush(queue); + test_error(error, "clFlush failed"); error = clFinish(queue_sec); test_error(error, "clFinish failed"); + error = clFinish(queue); + test_error(error, "clFinish failed"); + // verify the result - result buffer must contain initial pattern for (size_t i = 0; i < num_elements; i++) { diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp new file mode 100644 index 00000000..bd669165 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp @@ -0,0 +1,85 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "basic_command_buffer.h" +#include "procs.h" + +namespace { + +// Test that finalizing a command-buffer that has already been finalized returns +// the correct error code. +struct FinalizeInvalid : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandNDRangeKernelKHR( + command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements, + nullptr, 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandNDRangeKernelKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Finalizing an already finalized command-buffer must return + // CL_INVALID_OPERATION + error = clFinalizeCommandBufferKHR(command_buffer); + test_failure_error_ret( + error, CL_INVALID_OPERATION, + "clFinalizeCommandBufferKHR should return CL_INVALID_OPERATION", + TEST_FAIL); + + return CL_SUCCESS; + } +}; + +// Check that an empty command-buffer can be finalized and then executed. +struct FinalizeEmpty : public BasicCommandBufferTest +{ + using BasicCommandBufferTest::BasicCommandBufferTest; + + cl_int Run() override + { + // Finalize an empty command-buffer + cl_int error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + // Execute empty command-buffer and then wait to complete + clEventWrapper event; + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + error = clWaitForEvents(1, &event); + test_error(error, "clWaitForEvents failed"); + + return CL_SUCCESS; + } +}; +} // anonymous namespace + +int test_finalize_invalid(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest<FinalizeInvalid>(device, context, queue, + num_elements); +} + +int test_finalize_empty(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest<FinalizeEmpty>(device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp index 3ce410c0..63441970 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp @@ -26,6 +26,7 @@ enum class CombufInfoTestMode CITM_REF_COUNT, CITM_STATE, CITM_PROP_ARRAY, + CITM_CONTEXT, }; namespace { @@ -38,6 +39,7 @@ namespace { // -test case for CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query // -test case for CL_COMMAND_BUFFER_STATE_KHR query // -test case for CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query +// -test case for CL_COMMAND_BUFFER_CONTEXT_KHR query template <CombufInfoTestMode test_mode> struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest @@ -70,6 +72,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest error = RunPropArrayInfoTest(); test_error(error, "RunPropArrayInfoTest failed"); break; + case CombufInfoTestMode::CITM_CONTEXT: + error = RunContextInfoTest(); + test_error(error, "RunContextInfoTest failed"); + break; } return CL_SUCCESS; @@ -130,7 +136,7 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest // We can not check if this is the right queue because this is an opaque // object, test against NULL. - for (int i = 0; i < queue_list.size(); i++) + for (size_t i = 0; i < queue_list.size(); i++) { test_assert_error( queue_list[i] == queue, @@ -205,8 +211,7 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest // lambda to verify given state auto verify_state = [&](const cl_command_buffer_state_khr &expected) { - cl_command_buffer_state_khr state = - CL_COMMAND_BUFFER_STATE_INVALID_KHR; + cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0); cl_int error = clGetCommandBufferInfoKHR( command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state), @@ -240,9 +245,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest clEventWrapper trigger_event = clCreateUserEvent(context, &error); test_error(error, "clCreateUserEvent failed"); + clEventWrapper execute_event; // enqueued command buffer blocked on user event error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1, - &trigger_event, nullptr); + &trigger_event, &execute_event); test_error(error, "clEnqueueCommandBufferKHR failed"); // verify pending state @@ -255,6 +261,13 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest test_error(signal_error, "clSetUserEventStatus failed"); + error = clWaitForEvents(1, &execute_event); + test_error(error, "Unable to wait for execute event"); + + // verify executable state + error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR); + test_error(error, "verify_state failed"); + return CL_SUCCESS; } @@ -315,6 +328,46 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest return TEST_FAIL; } + cl_int RunContextInfoTest() + { + cl_int error = TEST_PASS; + + // record command buffers + error = RecordCommandBuffer(); + test_error(error, "RecordCommandBuffer failed"); + + size_t ret_value_size = 0; + error = clGetCommandBufferInfoKHR(command_buffer, + CL_COMMAND_BUFFER_CONTEXT_KHR, 0, + nullptr, &ret_value_size); + test_error(error, "clGetCommandBufferInfoKHR failed"); + + test_assert_error( + ret_value_size == sizeof(cl_context), + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context ret_context = nullptr; + error = clGetCommandBufferInfoKHR( + command_buffer, CL_COMMAND_BUFFER_CONTEXT_KHR, sizeof(cl_context), + &ret_context, nullptr); + test_error(error, "clGetCommandBufferInfoKHR failed"); + test_assert_error( + ret_context != nullptr, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + cl_context expected_context = nullptr; + error = + clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context), + &expected_context, nullptr); + test_error(error, "clGetCommandQueueInfo failed"); + + test_assert_error( + ret_context == expected_context, + "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!"); + + return TEST_PASS; + } + const cl_int pattern = 0xE; }; @@ -352,3 +405,11 @@ int test_info_prop_array(cl_device_id device, cl_context context, CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_PROP_ARRAY>>( device, context, queue, num_elements); } + +int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest< + CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_CONTEXT>>( + device, context, queue, num_elements); +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp index 28d80450..c06bbf76 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp @@ -160,7 +160,7 @@ struct CommandBufferProfiling : public BasicCommandBufferTest // verify the results by comparing timestamps bool all_vals_0 = prof_params.front().value != 0; - for (int i = 1; i < prof_params.size(); i++) + for (size_t i = 1; i < prof_params.size(); i++) { all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0; if (prof_params[i - 1].value > prof_params[i].value) diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp index d73fc9ce..82ff16f0 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp @@ -70,15 +70,42 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_int> output_data(num_elements); + std::vector<cl_int> output_data_1(num_elements); error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, - data_size(), output_data.data(), 1, &event, - nullptr); + data_size(), output_data_1.data(), 1, + &event, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < num_elements; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = + clEnqueueFillBuffer(queue, in_mem, &zero_pattern, sizeof(cl_int), 0, + data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = + clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR( + 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_int> output_data_2(num_elements); + error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0, + data_size(), output_data_2.data(), 1, + &event, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < num_elements; i++) + { + CHECK_VERIFICATION_ERROR(pattern, output_data_2[i], i); } return CL_SUCCESS; @@ -106,6 +133,7 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest } const cl_int pattern = 0x16; + const cl_int zero_pattern = 0x0; clCommandQueueWrapper out_of_order_queue; clCommandBufferWrapper out_of_order_command_buffer; clEventWrapper event; diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h index 0fd2e4ec..48abe25d 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H -#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#ifndef CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#define CL_KHR_COMMAND_BUFFER_TEST_BASE_H #include <CL/cl_ext.h> #include "harness/deviceInfo.h" @@ -174,4 +174,4 @@ public: } -#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H +#endif // CL_KHR_COMMAND_BUFFER_TEST_BASE_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp index 102ae761..0a30e76b 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "basic_command_buffer.h" +#include "svm_command_basic.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -38,7 +39,7 @@ struct CopyImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillImageKHR(command_buffer, nullptr, src_image, - fill_color, origin, region, 0, + fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -56,13 +57,38 @@ struct CopyImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_char> output_data(data_size); - error = clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, - 0, output_data.data(), 0, nullptr, nullptr); + std::vector<cl_char> output_data_1(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, src_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueFillImage(queue, dst_image, fill_color_2, origin, + region, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImageKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size); + error = + clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -97,8 +123,12 @@ struct CopyImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x05; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x05; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x1; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper src_image; clMemWrapper dst_image; @@ -111,7 +141,7 @@ struct CopyBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -127,20 +157,113 @@ struct CopyBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_char> output_data(data_size()); + std::vector<cl_char> output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size()); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); + } + + return CL_SUCCESS; + } + + const cl_char pattern_1 = 0x14; + const cl_char pattern_2 = 0x28; +}; + +struct CopySVMBufferKHR : public BasicSVMCommandBufferTest +{ + using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandSVMMemFillKHR( + command_buffer, nullptr, svm_in_mem(), &pattern_1, sizeof(cl_char), + data_size(), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandSVMMemFillKHR failed"); + + error = clCommandSVMMemcpyKHR(command_buffer, nullptr, svm_out_mem(), + svm_in_mem(), data_size(), 0, nullptr, + nullptr, nullptr); + test_error(error, "clCommandSVMMemcpyKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_1(data_size()); + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_1.data(), + svm_out_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + error = clEnqueueSVMMemFill(queue, svm_in_mem(), &pattern_2, + sizeof(cl_char), data_size(), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueSVMMemFill(queue, svm_out_mem(), &pattern_2, + sizeof(cl_char), data_size(), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size()); + + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_2.data(), + svm_out_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const cl_char pattern = 0x14; + const cl_char pattern_1 = 0x14; + const cl_char pattern_2 = 0x28; }; struct CopyBufferToImageKHR : public BasicCommandBufferTest @@ -150,7 +273,7 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, buffer, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, buffer, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -168,15 +291,40 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_char> output_data(data_size); + std::vector<cl_char> output_data_1(data_size); + + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadImage failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillImage(queue, image, &fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadImage failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -211,7 +359,14 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x11; + const cl_char pattern_1 = 0x11; + const cl_char pattern_2 = 0x22; + + const cl_uint fill_color_2[4] = { static_cast<cl_uint>(pattern_2), + static_cast<cl_uint>(pattern_2), + static_cast<cl_uint>(pattern_2), + static_cast<cl_uint>(pattern_2) }; + const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper buffer; @@ -225,7 +380,7 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -243,16 +398,39 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_char> output_data(data_size); + std::vector<cl_char> output_data_1(data_size); + + error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1), + output_data_1[i], i); + } + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size); error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -287,8 +465,12 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x12; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x12; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x24; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -302,7 +484,7 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -319,14 +501,38 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_char> output_data(data_size); + std::vector<cl_char> output_data_1(data_size); + error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char), + 0, data_size, 0, nullptr, nullptr); + test_error(error, "clEnqueueFillBuffer failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size); error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size, - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; @@ -353,7 +559,8 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_char pattern = 0x13; + const cl_char pattern_1 = 0x13; + const cl_char pattern_2 = 0x26; clMemWrapper in_mem; clMemWrapper out_mem; @@ -372,6 +579,14 @@ int test_copy_buffer(cl_device_id device, cl_context context, return MakeAndRunTest<CopyBufferKHR>(device, context, queue, num_elements); } +int test_copy_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest<CopySVMBufferKHR>(device, context, queue, + num_elements); +} + + int test_copy_buffer_to_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp index 88e97a27..67809cfb 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp @@ -14,6 +14,7 @@ // limitations under the License. // #include "basic_command_buffer.h" +#include "svm_command_basic.h" #include "harness/typeWrappers.h" #include "procs.h" @@ -35,7 +36,7 @@ struct FillImageKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = - clCommandFillImageKHR(command_buffer, nullptr, image, fill_color, + clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1, origin, region, 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillImageKHR failed"); @@ -47,14 +48,34 @@ struct FillImageKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_char> output_data(data_size); + std::vector<cl_char> output_data_1(data_size); error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, - output_data.data(), 0, nullptr, nullptr); + output_data_1.data(), 0, nullptr, nullptr); for (size_t i = 0; i < data_size; i++) { - CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern), - output_data[i], i); + CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1), + output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + error = clEnqueueFillImage(queue, image, fill_color_2, origin, region, + 0, nullptr, nullptr); + test_error(error, "clEnqueueFillImage failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size); + error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0, + output_data_2.data(), 0, nullptr, nullptr); + + for (size_t i = 0; i < data_size; i++) + { + CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1), + output_data_2[i], i); } return CL_SUCCESS; @@ -85,8 +106,12 @@ struct FillImageKHR : public BasicCommandBufferTest const size_t data_size = img_width * img_height * 4 * sizeof(cl_char); const size_t origin[3] = { 0, 0, 0 }, region[3] = { img_width, img_height, 1 }; - const cl_uint pattern = 0x10; - const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern }; + const cl_uint pattern_1 = 0x10; + const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1, + pattern_1 }; + const cl_uint pattern_2 = 0x20; + const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2, + pattern_2 }; const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 }; clMemWrapper image; @@ -99,7 +124,7 @@ struct FillBufferKHR : public BasicCommandBufferTest cl_int Run() override { cl_int error = clCommandFillBufferKHR( - command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0, + command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0, data_size(), 0, nullptr, nullptr, nullptr); test_error(error, "clCommandFillBufferKHR failed"); @@ -111,22 +136,100 @@ struct FillBufferKHR : public BasicCommandBufferTest nullptr, nullptr); test_error(error, "clEnqueueCommandBufferKHR failed"); - std::vector<cl_char> output_data(data_size()); + std::vector<cl_char> output_data_1(data_size()); + error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), + output_data_1.data(), 0, nullptr, nullptr); + test_error(error, "clEnqueueReadBuffer failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + + clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), 0, + data_size(), 0, nullptr, nullptr); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size()); error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(), - output_data.data(), 0, nullptr, nullptr); + output_data_2.data(), 0, nullptr, nullptr); test_error(error, "clEnqueueReadBuffer failed"); for (size_t i = 0; i < data_size(); i++) { - CHECK_VERIFICATION_ERROR(pattern, output_data[i], i); + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); } return CL_SUCCESS; } - const char pattern = 0x15; + const char pattern_1 = 0x15; + const char pattern_2 = 0x30; }; +struct FillSVMBufferKHR : public BasicSVMCommandBufferTest +{ + using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest; + + cl_int Run() override + { + cl_int error = clCommandSVMMemFillKHR( + command_buffer, nullptr, svm_in_mem(), &pattern_1, sizeof(cl_char), + data_size(), 0, nullptr, nullptr, nullptr); + test_error(error, "clCommandSVMMemFillKHR failed"); + + error = clFinalizeCommandBufferKHR(command_buffer); + test_error(error, "clFinalizeCommandBufferKHR failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_1(data_size()); + + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_1.data(), + svm_in_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i); + } + + /* Check second enqueue of command buffer */ + error = clEnqueueSVMMemFill(queue, svm_in_mem(), &pattern_2, + sizeof(cl_char), data_size(), 0, nullptr, + nullptr); + test_error(error, "clEnqueueSVMMemFill failed"); + + error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0, + nullptr, nullptr); + test_error(error, "clEnqueueCommandBufferKHR failed"); + + std::vector<cl_char> output_data_2(data_size()); + + error = + clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_2.data(), + svm_in_mem(), data_size(), 0, nullptr, nullptr); + test_error(error, "clEnqueueSVMMemcpy failed"); + + for (size_t i = 0; i < data_size(); i++) + { + CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i); + } + + return CL_SUCCESS; + } + + const char pattern_1 = 0x15; + const char pattern_2 = 0x30; +}; }; int test_fill_buffer(cl_device_id device, cl_context context, @@ -135,6 +238,14 @@ int test_fill_buffer(cl_device_id device, cl_context context, return MakeAndRunTest<FillBufferKHR>(device, context, queue, num_elements); } +int test_fill_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements) +{ + return MakeAndRunTest<FillSVMBufferKHR>(device, context, queue, + num_elements); +} + + int test_fill_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp index 4eefc8ab..4ecb0806 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp +++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp @@ -26,6 +26,7 @@ test_definition test_list[] = { ADD_TEST(info_ref_count), ADD_TEST(info_state), ADD_TEST(info_prop_array), + ADD_TEST(info_context), ADD_TEST(basic_profiling), ADD_TEST(simultaneous_profiling), ADD_TEST(regular_wait_for_command_buffer), @@ -44,8 +45,10 @@ test_definition test_list[] = { ADD_TEST(simultaneous_queue_substitution), ADD_TEST(fill_image), ADD_TEST(fill_buffer), + ADD_TEST(fill_svm_buffer), ADD_TEST(copy_image), ADD_TEST(copy_buffer), + ADD_TEST(copy_svm_buffer), ADD_TEST(copy_buffer_to_image), ADD_TEST(copy_image_to_buffer), ADD_TEST(copy_buffer_rect), @@ -58,7 +61,9 @@ test_definition test_list[] = { ADD_TEST(event_info_command_queue), ADD_TEST(event_info_execution_status), ADD_TEST(event_info_context), - ADD_TEST(event_info_reference_count) + ADD_TEST(event_info_reference_count), + ADD_TEST(finalize_invalid), + ADD_TEST(finalize_empty) }; int main(int argc, const char *argv[]) diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h index 63e004a7..ce121cea 100644 --- a/test_conformance/extensions/cl_khr_command_buffer/procs.h +++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H -#define _CL_KHR_COMMAND_BUFFER_PROCS_H +#ifndef CL_KHR_COMMAND_BUFFER_PROCS_H +#define CL_KHR_COMMAND_BUFFER_PROCS_H #include <CL/cl.h> @@ -41,6 +41,8 @@ extern int test_info_state(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_info_prop_array(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_info_context(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_basic_set_kernel_arg(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_pending_set_kernel_arg(cl_device_id device, cl_context context, @@ -101,10 +103,14 @@ extern int test_fill_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_fill_buffer(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_fill_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_copy_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_copy_buffer(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_copy_svm_buffer(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); extern int test_copy_buffer_to_image(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); extern int test_copy_image_to_buffer(cl_device_id device, cl_context context, @@ -130,5 +136,9 @@ extern int test_event_info_reference_count(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements); +extern int test_finalize_invalid(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); +extern int test_finalize_empty(cl_device_id device, cl_context context, + cl_command_queue queue, int num_elements); -#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/ +#endif // CL_KHR_COMMAND_BUFFER_PROCS_H diff --git a/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp new file mode 100644 index 00000000..1fc48ce5 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp @@ -0,0 +1,94 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "svm_command_basic.h" + +//-------------------------------------------------------------------------- + +bool BasicSVMCommandBufferTest::Skip() +{ + if (BasicCommandBufferTest::Skip()) return true; + + Version version = get_device_cl_version(device); + if (version < Version(2, 0)) + { + log_info("test requires OpenCL 2.x/3.0 device"); + return true; + } + + cl_device_svm_capabilities svm_capabilities; + cl_int error = + clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES, + sizeof(svm_capabilities), &svm_capabilities, NULL); + if (error != CL_SUCCESS) + { + print_error(error, "Unable to query CL_DEVICE_SVM_CAPABILITIES"); + return true; + } + + if (svm_capabilities == 0) + { + log_info("Device property CL_DEVICE_SVM_COARSE_GRAIN_BUFFER not " + "supported \n"); + return true; + } + + if (init_extension_functions() != CL_SUCCESS) + { + log_error("Unable to initialise extension functions"); + return true; + } + + return false; +} + +//-------------------------------------------------------------------------- + +cl_int BasicSVMCommandBufferTest::SetUpKernelArgs(void) +{ + size_t size = sizeof(cl_int) * num_elements * buffer_size_multiplier; + svm_in_mem = clSVMWrapper(context, size); + if (svm_in_mem() == nullptr) + { + log_error("Unable to allocate SVM memory"); + return CL_OUT_OF_RESOURCES; + } + svm_out_mem = clSVMWrapper(context, size); + if (svm_out_mem() == nullptr) + { + log_error("Unable to allocate SVM memory"); + return CL_OUT_OF_RESOURCES; + } + return CL_SUCCESS; +} + +//-------------------------------------------------------------------------- + +cl_int BasicSVMCommandBufferTest::init_extension_functions() +{ + cl_int error = BasicCommandBufferTest::init_extension_functions(); + test_error(error, "Unable to initialise extension functions"); + + cl_platform_id platform; + error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), + &platform, nullptr); + test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); + + GET_EXTENSION_ADDRESS(clCommandSVMMemFillKHR); + GET_EXTENSION_ADDRESS(clCommandSVMMemcpyKHR); + + return CL_SUCCESS; +} diff --git a/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h new file mode 100644 index 00000000..f6b6b427 --- /dev/null +++ b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h @@ -0,0 +1,42 @@ +// +// Copyright (c) 2023 The Khronos Group Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#ifndef CL_KHR_SVM_COMMAND_BASIC_H +#define CL_KHR_SVM_COMMAND_BASIC_H + +#include "basic_command_buffer.h" + + +struct BasicSVMCommandBufferTest : BasicCommandBufferTest +{ + BasicSVMCommandBufferTest(cl_device_id device, cl_context context, + cl_command_queue queue) + : BasicCommandBufferTest(device, context, queue) + {} + + virtual bool Skip() override; + virtual cl_int SetUpKernelArgs(void) override; + +protected: + cl_int init_extension_functions(); + + clCommandSVMMemFillKHR_fn clCommandSVMMemFillKHR = nullptr; + clCommandSVMMemcpyKHR_fn clCommandSVMMemcpyKHR = nullptr; + + clSVMWrapper svm_in_mem, svm_out_mem; +}; + +#endif diff --git a/test_conformance/extensions/cl_khr_external_semaphore/procs.h b/test_conformance/extensions/cl_khr_external_semaphore/procs.h index 753c8fe2..7e1c4caf 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/procs.h +++ b/test_conformance/extensions/cl_khr_external_semaphore/procs.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H -#define _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H +#ifndef CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H +#define CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H #include <CL/cl.h> @@ -79,4 +79,4 @@ extern int test_external_semaphores_invalid_command(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -#endif /* CL_KHR_EXTERNAL_SEMAPHORE */ +#endif // CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp index a7ed307e..89ab17b3 100644 --- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp +++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp @@ -120,9 +120,11 @@ int test_external_semaphores_queries(cl_device_id deviceID, cl_context context, SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr, CL_SEMAPHORE_TYPE_BINARY_KHR); - SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_uint, 1); + SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_device_id, deviceID); - SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, 1); + SEMAPHORE_PARAM_TEST( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, + getCLSemaphoreTypeFromVulkanType(vkExternalSemaphoreHandleType)); // Confirm that querying CL_SEMAPHORE_CONTEXT_KHR returns the right context SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_CONTEXT_KHR, cl_context, context); @@ -290,7 +292,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID, nullptr, 0, nullptr, &wait_event); test_error(err, "Could not wait semaphore"); - // Finish queue_1 and queue_2 + // Finish queue_1 and queue_2 err = clFinish(queue_1); test_error(err, "Could not finish queue"); @@ -304,7 +306,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID, return TEST_PASS; } -// Confirm that a signal followed by a wait will complete successfully +// Confirm that a signal followed by a wait will complete successfully int test_external_semaphores_simple_1(cl_device_id deviceID, cl_context context, cl_command_queue defaultQueue, int num_elements) @@ -931,420 +933,3 @@ int test_external_semaphores_multi_wait(cl_device_id deviceID, return TEST_PASS; } - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first one deferred wait, then one non deferred signal. -int test_external_semaphores_order_1(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector<VulkanExternalMemoryHandleType> - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Wait semaphore (dependency on user_event) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 0, nullptr, &signal_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure signal event is completed while wait event is not - test_assert_event_complete(signal_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then one deferred wait. Unblock -// signal, then unblock wait. When wait completes, unblock the other signal. -int test_external_semaphores_order_2(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector<VulkanExternalMemoryHandleType> - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_3, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are completed except for second signal - test_assert_event_complete(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_complete(wait_event); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_event); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then two deferred waits. Unblock -// one signal and one wait (both blocked by the same user event). When wait -// completes, unblock the other signal. Then unblock the other wait. -int test_external_semaphores_order_3(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector<VulkanExternalMemoryHandleType> - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext(vkVk2CLSemaphore, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_1_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_3, &wait_1_event); - test_error(err, "Could not wait semaphore"); - - // Wait semaphore (dependency on user_event_2) - clEventWrapper wait_2_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(), - nullptr, 1, &user_event_2, &wait_2_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure only second signal and second wait completed - cl_event event_list[] = { signal_2_event, wait_2_event }; - err = clWaitForEvents(2, event_list); - test_error(err, "Could not wait for events"); - - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(wait_1_event); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_1_event); - test_assert_event_complete(wait_2_event); - - return TEST_PASS; -} - -// Test that an invalid semaphore command results in the invalidation of the -// command's event and the dependencies' events -int test_external_semaphores_invalid_command(cl_device_id deviceID, - cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - if (!is_extension_available(deviceID, "cl_khr_external_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - if (init_vuikan_device()) - { - log_info("Cannot initialise Vulkan. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - VulkanDevice vkDevice; - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - - const std::vector<VulkanExternalMemoryHandleType> - vkExternalMemoryHandleTypeList = - getSupportedVulkanExternalMemoryHandleTypeList(); - VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType = - getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; - VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType); - VulkanSemaphore vkVk2CLSemaphore2(vkDevice, vkExternalSemaphoreHandleType); - - clExternalSemaphore sema_ext_1(vkVk2CLSemaphore1, context, - vkExternalSemaphoreHandleType, deviceID); - clExternalSemaphore sema_ext_2(vkVk2CLSemaphore2, context, - vkExternalSemaphoreHandleType, deviceID); - - cl_int err = CL_SUCCESS; - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore_1 (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), - nullptr, 1, &user_event_1, - &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore_1 and semaphore_2 (dependency on user_event_1) - clEventWrapper wait_event; - cl_semaphore_khr sema_list[] = { sema_ext_1.getCLSemaphore(), - sema_ext_2.getCLSemaphore() }; - err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1, - &user_event_1, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore_1 (dependency on wait_event and user_event_2) - clEventWrapper signal_2_event; - cl_event wait_list[] = { user_event_2, wait_event }; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(), - nullptr, 2, wait_list, &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are not completed - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event_1 (expect failure as waiting on semaphore_2 is not - // allowed (unsignaled) - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_assert_error(err != CL_SUCCESS, - "signal_2_event completed unexpectedly"); - - // Ensure signal_1 is completed while others failed (the second signal - // should fail as it depends on wait) - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - test_assert_event_complete(signal_1_event); - test_assert_event_terminated(wait_event); - test_assert_event_terminated(signal_2_event); - - return TEST_PASS; -} diff --git a/test_conformance/extensions/cl_khr_semaphore/main.cpp b/test_conformance/extensions/cl_khr_semaphore/main.cpp index ab9699b0..0ae7206a 100644 --- a/test_conformance/extensions/cl_khr_semaphore/main.cpp +++ b/test_conformance/extensions/cl_khr_semaphore/main.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -34,11 +34,7 @@ test_definition test_list[] = { ADD_TEST_VERSION(semaphores_multi_signal, Version(1, 2)), ADD_TEST_VERSION(semaphores_multi_wait, Version(1, 2)), ADD_TEST_VERSION(semaphores_queries, Version(1, 2)), - ADD_TEST_VERSION(semaphores_order_1, Version(1, 2)), - ADD_TEST_VERSION(semaphores_order_2, Version(1, 2)), - ADD_TEST_VERSION(semaphores_order_3, Version(1, 2)), ADD_TEST_VERSION(semaphores_import_export_fd, Version(1, 2)), - ADD_TEST_VERSION(semaphores_invalid_command, Version(1, 2)), }; const int test_num = ARRAY_SIZE(test_list); diff --git a/test_conformance/extensions/cl_khr_semaphore/procs.h b/test_conformance/extensions/cl_khr_semaphore/procs.h index 06651af4..f7c1aaa3 100644 --- a/test_conformance/extensions/cl_khr_semaphore/procs.h +++ b/test_conformance/extensions/cl_khr_semaphore/procs.h @@ -1,5 +1,5 @@ // -// Copyright (c) 2017 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -41,17 +41,7 @@ extern int test_semaphores_multi_wait(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); extern int test_semaphores_queries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_semaphores_order_1(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements); -extern int test_semaphores_order_2(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements); -extern int test_semaphores_order_3(cl_device_id deviceID, cl_context context, - cl_command_queue queue, int num_elements); extern int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements); -extern int test_semaphores_invalid_command(cl_device_id deviceID, - cl_context context, - cl_command_queue queue, - int num_elements); diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp index 7d03bff3..36bb8ad5 100644 --- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp +++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 2022 The Khronos Group Inc. +// Copyright (c) 2023 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -646,303 +646,6 @@ int test_semaphores_queries(cl_device_id deviceID, cl_context context, return TEST_PASS; } -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first one deferred wait, then one non deferred signal. -int test_semaphores_order_1(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR), - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user event - clEventWrapper user_event = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Wait semaphore (dependency on user_event) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event, - &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore - clEventWrapper signal_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr, - &signal_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure signal event is completed while wait event is not - test_assert_event_complete(signal_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event - err = clSetUserEventStatus(user_event, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_event); - test_assert_event_complete(wait_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema); - test_error(err, "Could not release semaphore"); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then one deferred wait. Unblock -// signal, then unblock wait. When wait completes, unblock the other signal. -int test_semaphores_order_2(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR), - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_1, &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_2, &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3, - &wait_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are completed except for second signal - test_assert_event_complete(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_complete(wait_event); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema); - test_error(err, "Could not release semaphore"); - - return TEST_PASS; -} - -// Confirm that it is possible to enqueue a signal of wait and signal in any -// order as soon as the submission order (after deferred dependencies) is -// correct. Case: first two deferred signals, then two deferred waits. Unblock -// one signal and one wait (both blocked by the same user event). When wait -// completes, unblock the other signal. Then unblock the other wait. -int test_semaphores_order_3(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphore - cl_semaphore_properties_khr sema_props[] = { - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR), - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_3 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_1, &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Signal semaphore (dependency on user_event_2) - clEventWrapper signal_2_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1, - &user_event_2, &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore (dependency on user_event_3) - clEventWrapper wait_1_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3, - &wait_1_event); - test_error(err, "Could not wait semaphore"); - - // Wait semaphore (dependency on user_event_2) - clEventWrapper wait_2_event; - err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_2, - &wait_2_event); - test_error(err, "Could not wait semaphore"); - - // Complete user_event_2 - err = clSetUserEventStatus(user_event_2, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure only second signal and second wait completed - cl_event event_list[] = { signal_2_event, wait_2_event }; - err = clWaitForEvents(2, event_list); - test_error(err, "Could not wait for events"); - - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(wait_1_event); - - // Complete user_event_1 - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Complete user_event_3 - err = clSetUserEventStatus(user_event_3, CL_COMPLETE); - test_error(err, "Could not set user event to CL_COMPLETE"); - - // Finish - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - // Ensure all events are completed - test_assert_event_complete(signal_1_event); - test_assert_event_complete(signal_2_event); - test_assert_event_complete(wait_1_event); - test_assert_event_complete(wait_2_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema); - test_error(err, "Could not release semaphore"); - - return TEST_PASS; -} - // Test it is possible to export a semaphore to a sync fd and import the same // sync fd to a new semaphore int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, @@ -985,6 +688,8 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR), static_cast<cl_semaphore_properties_khr>( CL_SEMAPHORE_HANDLE_SYNC_FD_KHR), + static_cast<cl_semaphore_properties_khr>( + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR), 0 }; cl_semaphore_khr sema_1 = @@ -1040,106 +745,4 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context, err = clReleaseSemaphoreKHR(sema_2); test_error(err, "Could not release semaphore"); return TEST_PASS; -} - -// Test that an invalid semaphore command results in the invalidation of the -// command's event and the dependencies' events -int test_semaphores_invalid_command(cl_device_id deviceID, cl_context context, - cl_command_queue defaultQueue, - int num_elements) -{ - cl_int err; - - if (!is_extension_available(deviceID, "cl_khr_semaphore")) - { - log_info("cl_khr_semaphore is not supported on this platoform. " - "Skipping test.\n"); - return TEST_SKIPPED_ITSELF; - } - - // Obtain pointers to semaphore's API - GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR); - GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR); - GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR); - GET_PFN(deviceID, clReleaseSemaphoreKHR); - - // Create ooo queue - clCommandQueueWrapper queue = clCreateCommandQueue( - context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err); - test_error(err, "Could not create command queue"); - - // Create semaphores - cl_semaphore_properties_khr sema_props[] = { - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR), - static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR), - 0 - }; - cl_semaphore_khr sema_1 = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - cl_semaphore_khr sema_2 = - clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err); - test_error(err, "Could not create semaphore"); - - // Create user events - clEventWrapper user_event_1 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - clEventWrapper user_event_2 = clCreateUserEvent(context, &err); - test_error(err, "Could not create user event"); - - // Signal semaphore_1 (dependency on user_event_1) - clEventWrapper signal_1_event; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 1, - &user_event_1, &signal_1_event); - test_error(err, "Could not signal semaphore"); - - // Wait semaphore_1 and semaphore_2 (dependency on user_event_1) - clEventWrapper wait_event; - cl_semaphore_khr sema_list[] = { sema_1, sema_2 }; - err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1, - &user_event_1, &wait_event); - test_error(err, "Could not wait semaphore"); - - // Signal semaphore_1 (dependency on wait_event and user_event_2) - clEventWrapper signal_2_event; - cl_event wait_list[] = { user_event_2, wait_event }; - err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 2, wait_list, - &signal_2_event); - test_error(err, "Could not signal semaphore"); - - // Flush and delay - err = clFlush(queue); - test_error(err, "Could not flush queue"); - std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S)); - - // Ensure all events are not completed - test_assert_event_inprogress(signal_1_event); - test_assert_event_inprogress(signal_2_event); - test_assert_event_inprogress(wait_event); - - // Complete user_event_1 (expect failure as waiting on semaphore_2 is not - // allowed (unsignaled) - err = clSetUserEventStatus(user_event_1, CL_COMPLETE); - test_assert_error(err != CL_SUCCESS, - "signal_2_event completed unexpectedly"); - - // Ensure signal_1 is completed while others failed (the second signal - // should fail as it depends on wait) - err = clFinish(queue); - test_error(err, "Could not finish queue"); - - test_assert_event_complete(signal_1_event); - test_assert_event_terminated(wait_event); - test_assert_event_terminated(signal_2_event); - - // Release semaphore - err = clReleaseSemaphoreKHR(sema_1); - test_error(err, "Could not release semaphore"); - - err = clReleaseSemaphoreKHR(sema_2); - test_error(err, "Could not release semaphore"); - - return TEST_PASS; }
\ No newline at end of file diff --git a/test_conformance/geometrics/CMakeLists.txt b/test_conformance/geometrics/CMakeLists.txt index 3fee05fb..8a6f25c6 100644 --- a/test_conformance/geometrics/CMakeLists.txt +++ b/test_conformance/geometrics/CMakeLists.txt @@ -6,5 +6,7 @@ set(${MODULE_NAME}_SOURCES test_geometrics.cpp ) +set_gnulike_module_compile_flags("-Wno-sign-compare") + include(../CMakeCommon.txt) diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp index 4d721296..69d00a1a 100644 --- a/test_conformance/gl/test_images_write_common.cpp +++ b/test_conformance/gl/test_images_write_common.cpp @@ -571,6 +571,7 @@ static int test_image_format_write(cl_context context, cl_command_queue queue, "%s (%s):%d", GetGLTargetName(target), __FUNCTION__, __FILE__, __LINE__); + return -1; } // If there was a problem during creation, make sure it isn't a known diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp index 3e0b60d9..888ca6ec 100644 --- a/test_conformance/images/clCopyImage/test_copy_generic.cpp +++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp @@ -519,32 +519,53 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d if( gDebugTrace ) log_info( " - Scanline verification...\n" ); - size_t thirdDim; - size_t secondDim; - if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY) - { - secondDim = dstImageInfo->arraySize; - thirdDim = 1; - } - else if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY) + size_t thirdDim = 1; + size_t secondDim = 1; + + switch (dstImageInfo->type) { - secondDim = dstImageInfo->height; - if( gTestMipmaps ) - secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1; - thirdDim = dstImageInfo->arraySize; + case CL_MEM_OBJECT_IMAGE1D_ARRAY: { + secondDim = dstImageInfo->arraySize; + break; + } + case CL_MEM_OBJECT_IMAGE2D_ARRAY: { + secondDim = dstImageInfo->height; + thirdDim = dstImageInfo->arraySize; + break; + } + case CL_MEM_OBJECT_IMAGE3D: { + secondDim = dstImageInfo->height; + thirdDim = dstImageInfo->depth; + break; + } + case CL_MEM_OBJECT_IMAGE2D: { + secondDim = dstImageInfo->height; + break; + } + case CL_MEM_OBJECT_IMAGE1D: { + break; + } + default: { + log_error("ERROR: Unsupported Image type. \n"); + return error; + break; + } } - else + if (gTestMipmaps) { - secondDim = dstImageInfo->height; - thirdDim = dstImageInfo->depth; - if( gTestMipmaps ) + switch (dstImageInfo->type) { - secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1; - if(dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D) + case CL_MEM_OBJECT_IMAGE3D: thirdDim = (dstImageInfo->depth >> dst_lod) ? (dstImageInfo->depth >> dst_lod):1; + /* Fallthrough */ + case CL_MEM_OBJECT_IMAGE2D: + case CL_MEM_OBJECT_IMAGE2D_ARRAY: + secondDim = (dstImageInfo->height >> dst_lod) + ? (dstImageInfo->height >> dst_lod) + : 1; + break; } } - for( size_t z = 0; z < thirdDim; z++ ) { for( size_t y = 0; y < secondDim; y++ ) diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp index 6ee1e536..e839cfdf 100644 --- a/test_conformance/images/clCopyImage/test_loops.cpp +++ b/test_conformance/images/clCopyImage/test_loops.cpp @@ -41,60 +41,52 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q } } - if( testMethod == k1D ) + switch (testMethod) { - name = "1D -> 1D"; - imageType = CL_MEM_OBJECT_IMAGE1D; - } - else if( testMethod == k2D ) - { - name = "2D -> 2D"; - imageType = CL_MEM_OBJECT_IMAGE2D; - } - else if( testMethod == k3D ) - { - name = "3D -> 3D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k1DArray ) - { - name = "1D array -> 1D array"; - imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; - } - else if( testMethod == k2DArray ) - { - name = "2D array -> 2D array"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - } - else if( testMethod == k2DTo3D ) - { - name = "2D -> 3D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k3DTo2D ) - { - name = "3D -> 2D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k2DArrayTo2D ) - { - name = "2D array -> 2D"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - } - else if( testMethod == k2DTo2DArray ) - { - name = "2D -> 2D array"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - } - else if( testMethod == k2DArrayTo3D ) - { - name = "2D array -> 3D"; - imageType = CL_MEM_OBJECT_IMAGE3D; - } - else if( testMethod == k3DTo2DArray ) - { - name = "3D -> 2D array"; - imageType = CL_MEM_OBJECT_IMAGE3D; + case k1D: + name = "1D -> 1D"; + imageType = CL_MEM_OBJECT_IMAGE1D; + break; + case k2D: + name = "2D -> 2D"; + imageType = CL_MEM_OBJECT_IMAGE2D; + break; + case k3D: + name = "3D -> 3D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k1DArray: + name = "1D array -> 1D array"; + imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + break; + case k2DArray: + name = "2D array -> 2D array"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case k2DTo3D: + name = "2D -> 3D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k3DTo2D: + name = "3D -> 2D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k2DArrayTo2D: + name = "2D array -> 2D"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case k2DTo2DArray: + name = "2D -> 2D array"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + break; + case k2DArrayTo3D: + name = "2D array -> 3D"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; + case k3DTo2DArray: + name = "3D -> 2D array"; + imageType = CL_MEM_OBJECT_IMAGE3D; + break; } if(gTestMipmaps) diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp index 759f48d2..126ea0eb 100644 --- a/test_conformance/images/clFillImage/test_loops.cpp +++ b/test_conformance/images/clFillImage/test_loops.cpp @@ -33,35 +33,34 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q cl_mem_object_type imageType; test_func test_fn; - if ( testMethod == k1D ) + switch (testMethod) { - name = "1D Image Fill"; - imageType = CL_MEM_OBJECT_IMAGE1D; - test_fn = &test_fill_image_set_1D; - } - else if ( testMethod == k2D ) - { - name = "2D Image Fill"; - imageType = CL_MEM_OBJECT_IMAGE2D; - test_fn = &test_fill_image_set_2D; - } - else if ( testMethod == k1DArray ) - { - name = "1D Image Array Fill"; - imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; - test_fn = &test_fill_image_set_1D_array; - } - else if ( testMethod == k2DArray ) - { - name = "2D Image Array Fill"; - imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; - test_fn = &test_fill_image_set_2D_array; - } - else if ( testMethod == k3D ) - { - name = "3D Image Fill"; - imageType = CL_MEM_OBJECT_IMAGE3D; - test_fn = &test_fill_image_set_3D; + case k1D: + name = "1D Image Fill"; + imageType = CL_MEM_OBJECT_IMAGE1D; + test_fn = &test_fill_image_set_1D; + break; + case k2D: + name = "2D Image Fill"; + imageType = CL_MEM_OBJECT_IMAGE2D; + test_fn = &test_fill_image_set_2D; + break; + case k1DArray: + name = "1D Image Array Fill"; + imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY; + test_fn = &test_fill_image_set_1D_array; + break; + case k2DArray: + name = "2D Image Array Fill"; + imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY; + test_fn = &test_fill_image_set_2D_array; + break; + case k3D: + name = "3D Image Fill"; + imageType = CL_MEM_OBJECT_IMAGE3D; + test_fn = &test_fill_image_set_3D; + break; + default: log_error("Unhandled method\n"); return -1; } log_info( "Running %s tests...\n", name ); diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt index b5527c74..d7e7eded 100644 --- a/test_conformance/images/kernel_read_write/CMakeLists.txt +++ b/test_conformance/images/kernel_read_write/CMakeLists.txt @@ -21,7 +21,7 @@ set(${MODULE_NAME}_SOURCES # Make unused variables not fatal in this module; see # https://github.com/KhronosGroup/OpenCL-CTS/issues/1484 -set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable") +set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable -Wno-sign-compare") include(../../CMakeCommon.txt) diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp index c6646330..887c9dca 100644 --- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp +++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef _TEST_CL_EXT_IMAGE_BUFFER -#define _TEST_CL_EXT_IMAGE_BUFFER +#ifndef TEST_CL_EXT_IMAGE_BUFFER +#define TEST_CL_EXT_IMAGE_BUFFER #define TEST_IMAGE_SIZE 20 @@ -48,8 +48,10 @@ static inline size_t get_format_size(cl_context context, cl_image_desc image_desc = { 0 }; image_desc.image_type = imageType; - /* Size 1 only to query element size */ - image_desc.image_width = 1; + /* We use a width of 4 to query element size, as this is + the smallest possible value that satisfies the requirements + of all image formats (including extensions). */ + image_desc.image_width = 4; if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType && CL_MEM_OBJECT_IMAGE1D != imageType) { @@ -121,4 +123,4 @@ static inline void image_desc_init(cl_image_desc* desc, } } -#endif /* _TEST_CL_EXT_IMAGE_BUFFER */
\ No newline at end of file +#endif // TEST_CL_EXT_IMAGE_BUFFER diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt index 32814026..a221f05a 100644 --- a/test_conformance/math_brute_force/CMakeLists.txt +++ b/test_conformance/math_brute_force/CMakeLists.txt @@ -45,4 +45,6 @@ set(${MODULE_NAME}_SOURCES # warnings), but other tests not (yet); so enable -Wall locally. set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas") +add_cxx_flag_if_supported(-ffp-contract=off) + include(../CMakeCommon.txt) diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp index 3d6ce152..953c33bb 100644 --- a/test_conformance/math_brute_force/i_unary_double.cpp +++ b/test_conformance/math_brute_force/i_unary_double.cpp @@ -50,11 +50,6 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - // This test is not using ThreadPool so we need to disable FTZ here - // for reference computations - FPU_mode_type oldMode; - DisableFTZ(&oldMode); - Force64BitFPUPrecision(); // Init the kernels @@ -227,6 +222,5 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - RestoreFPState(&oldMode); return error; } diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp index 94ebc66a..0ce37cc8 100644 --- a/test_conformance/math_brute_force/i_unary_float.cpp +++ b/test_conformance/math_brute_force/i_unary_float.cpp @@ -49,11 +49,6 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - // This test is not using ThreadPool so we need to disable FTZ here - // for reference computations - FPU_mode_type oldMode; - DisableFTZ(&oldMode); - Force64BitFPUPrecision(); // Init the kernels @@ -225,6 +220,5 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode) vlog("\n"); exit: - RestoreFPState(&oldMode); return error; } diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp index 53679788..34f49a5a 100644 --- a/test_conformance/math_brute_force/macro_unary_float.cpp +++ b/test_conformance/math_brute_force/macro_unary_float.cpp @@ -81,7 +81,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) int ftz = job->ftz; bool relaxedMode = job->relaxedMode; cl_int error = CL_SUCCESS; - cl_int ret = CL_SUCCESS; const char *name = job->f->name; int signbit_test = 0; @@ -245,8 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (q[j] > t[j]) err = q[j] - t[j]; vlog_error("\nERROR: %s: %d ulp error at %a: *%d vs. %d\n", name, err, ((float *)s)[j], t[j], q[j]); - error = -1; - goto exit; + return -1; } @@ -272,15 +270,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) vlog_error( "\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name, sizeNames[k], err, ((float *)s)[j], -t[j], q[j]); - error = -1; - goto exit; + return -1; } } } } -exit: - ret = error; for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) { if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], @@ -315,7 +310,7 @@ exit: fflush(stdout); } - return ret; + return CL_SUCCESS; } } // anonymous namespace diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp index 74c5a160..8d423408 100644 --- a/test_conformance/math_brute_force/unary_two_results_float.cpp +++ b/test_conformance/math_brute_force/unary_two_results_float.cpp @@ -189,12 +189,11 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode) // Get that moving if ((error = clFlush(gQueue))) vlog("clFlush failed\n"); - FPU_mode_type oldMode; + FPU_mode_type oldMode = 0; RoundingMode oldRoundMode = kRoundToNearestEven; if (isFract) { // Calculate the correctly rounded reference result - memset(&oldMode, 0, sizeof(oldMode)); if (ftz || relaxedMode) ForceFTZ(&oldMode); // Set the rounding mode to match the device diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h index 78692d17..0784c2c2 100644 --- a/test_conformance/mem_host_flags/C_host_memory_block.h +++ b/test_conformance/mem_host_flags/C_host_memory_block.h @@ -24,14 +24,14 @@ template <class T> class C_host_memory_block { public: - int num_elements; + size_t num_elements; int element_size; T *pData; C_host_memory_block(); ~C_host_memory_block(); - void Init(int num_elem, T &value); - void Init(int num_elem); + void Init(size_t num_elem, T &value); + void Init(size_t num_elem); void Set_to(T &val); void Set_to_zero(); bool Equal_to(T &val); @@ -40,7 +40,7 @@ public: bool Equal_rect(C_host_memory_block<T> &another, size_t *host_origin, size_t *region, size_t host_row_pitch, size_t host_slice_pitch); - bool Equal(T *pData, int num_elements); + bool Equal(T *pData, size_t num_elements); bool Equal_rect_from_orig(C_host_memory_block<T> &another, size_t *soffset, size_t *region, size_t host_row_pitch, @@ -63,20 +63,20 @@ template <class T> C_host_memory_block<T>::~C_host_memory_block() num_elements = 0; } -template <class T> void C_host_memory_block<T>::Init(int num_elem, T &value) +template <class T> void C_host_memory_block<T>::Init(size_t num_elem, T &value) { if (pData != NULL) delete[] pData; pData = new T[num_elem]; - for (int i = 0; i < num_elem; i++) pData[i] = value; + for (size_t i = 0; i < num_elem; i++) pData[i] = value; num_elements = num_elem; } -template <class T> void C_host_memory_block<T>::Init(int num_elem) +template <class T> void C_host_memory_block<T>::Init(size_t num_elem) { if (pData != NULL) delete[] pData; pData = new T[num_elem]; - for (int i = 0; i < num_elem; i++) pData[i] = (T)i; + for (size_t i = 0; i < num_elem; i++) pData[i] = (T)i; num_elements = num_elem; } @@ -88,14 +88,14 @@ template <class T> void C_host_memory_block<T>::Set_to_zero() template <class T> void C_host_memory_block<T>::Set_to(T &val) { - for (int i = 0; i < num_elements; i++) pData[i] = val; + for (size_t i = 0; i < num_elements; i++) pData[i] = val; } template <class T> bool C_host_memory_block<T>::Equal_to(T &val) { - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == val) count++; } @@ -106,9 +106,9 @@ template <class T> bool C_host_memory_block<T>::Equal_to(T &val) template <class T> bool C_host_memory_block<T>::Equal(C_host_memory_block<T> &another) { - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == another.pData[i]) count++; } @@ -117,13 +117,13 @@ bool C_host_memory_block<T>::Equal(C_host_memory_block<T> &another) } template <class T> -bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements) +bool C_host_memory_block<T>::Equal(T *pIn_Data, size_t Innum_elements) { if (this->num_elements != Innum_elements) return false; - int count = 0; + size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == pIn_Data[i]) count++; } @@ -134,7 +134,7 @@ bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements) template <class T> size_t C_host_memory_block<T>::Count(T &val) { size_t count = 0; - for (int i = 0; i < num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { if (pData[i] == val) count++; } diff --git a/test_conformance/mem_host_flags/checker.h b/test_conformance/mem_host_flags/checker.h index 835f120b..0bb826f4 100644 --- a/test_conformance/mem_host_flags/checker.h +++ b/test_conformance/mem_host_flags/checker.h @@ -219,7 +219,7 @@ cl_int cBuffer_checker<T>::SetupASSubBuffer(cl_mem_flags parent_buffer_flag) err = CL_SUCCESS; } - cl_mem_flags f; + cl_mem_flags f = 0; if (parent_buffer_flag & CL_MEM_HOST_READ_ONLY) f = CL_MEM_HOST_READ_ONLY; else if (parent_buffer_flag & CL_MEM_HOST_WRITE_ONLY) diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt index f78dd195..30c3a846 100644 --- a/test_conformance/non_uniform_work_group/CMakeLists.txt +++ b/test_conformance/non_uniform_work_group/CMakeLists.txt @@ -10,8 +10,6 @@ set(${MODULE_NAME}_SOURCES tools.cpp ) -set_gnulike_module_compile_flags("-Wno-unused-but-set-variable") - include(../CMakeCommon.txt) # end of file # diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp index a4a6a744..44781ca8 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp @@ -448,13 +448,8 @@ void TestNonUniformWorkGroup::verifyData (DataContainerAttrib * reference, DataC } void TestNonUniformWorkGroup::calculateExpectedValues () { - size_t nonRemainderGlobalSize[MAX_DIMS]; size_t numberOfPossibleRegions[MAX_DIMS]; - nonRemainderGlobalSize[0] = _globalSize[0] - (_globalSize[0] % _enqueuedLocalSize[0]); - nonRemainderGlobalSize[1] = _globalSize[1] - (_globalSize[1] % _enqueuedLocalSize[1]); - nonRemainderGlobalSize[2] = _globalSize[2] - (_globalSize[2] % _enqueuedLocalSize[2]); - numberOfPossibleRegions[0] = (_globalSize[0]>1)?2:1; numberOfPossibleRegions[1] = (_globalSize[1]>1)?2:1; numberOfPossibleRegions[2] = (_globalSize[2]>1)?2:1; @@ -502,6 +497,11 @@ size_t TestNonUniformWorkGroup::getMaxLocalWorkgroupSize (const cl_device_id &de if (TestNonUniformWorkGroup::_maxLocalWorkgroupSize == 0) { err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(TestNonUniformWorkGroup::_maxLocalWorkgroupSize), &TestNonUniformWorkGroup::_maxLocalWorkgroupSize, NULL); + if (err) + { + log_error("clGetDeviceInfo failed\n"); + return 0; + } } return TestNonUniformWorkGroup::_maxLocalWorkgroupSize; diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h index 414d1004..f5846061 100644 --- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h +++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _TESTNONUNIFORMWORKGROUP_H -#define _TESTNONUNIFORMWORKGROUP_H +#ifndef TESTNONUNIFORMWORKGROUP_H +#define TESTNONUNIFORMWORKGROUP_H #include "procs.h" #include <vector> @@ -147,5 +147,4 @@ private: unsigned int _overallCounter; }; -#endif // _TESTNONUNIFORMWORKGROUP_H - +#endif // TESTNONUNIFORMWORKGROUP_H diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h index 2e63c3dd..ba01fc99 100644 --- a/test_conformance/non_uniform_work_group/tools.h +++ b/test_conformance/non_uniform_work_group/tools.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _TOOLS_H -#define _TOOLS_H +#ifndef TOOLS_H +#define TOOLS_H #include "procs.h" #include <vector> @@ -106,4 +106,4 @@ namespace Error { }; } -#endif // _TOOLS_H +#endif // TOOLS_H diff --git a/test_conformance/pipes/kernels.h b/test_conformance/pipes/kernels.h index a2fb70c0..a897e5e8 100644 --- a/test_conformance/pipes/kernels.h +++ b/test_conformance/pipes/kernels.h @@ -13,8 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#ifndef _KERNELS_H_ -#define _KERNELS_H_ +#ifndef KERNELS_H_ +#define KERNELS_H_ static const char* pipe_readwrite_struct_kernel_code = { "typedef struct{\n" @@ -127,4 +127,4 @@ static const char* pipe_convenience_readwrite_struct_kernel_code = { " read_pipe(in_pipe, &dst[gid]);\n" "}\n" }; -#endif //_KERNELS_H_ +#endif // KERNELS_H_ diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp index e1048f5f..76b80b15 100644 --- a/test_conformance/pipes/test_pipe_limits.cpp +++ b/test_conformance/pipes/test_pipe_limits.cpp @@ -274,8 +274,7 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm size_t global_work_size[3]; cl_int err; size_t size; - int num_pipe_elements = 1024; - int i; + cl_uint num_pipe_elements = 1024; cl_uint max_pipe_packet_size; clEventWrapper producer_sync_event = NULL; clEventWrapper consumer_sync_event = NULL; @@ -287,7 +286,7 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm size_t min_alignment = get_min_alignment(context); - global_work_size[0] = (cl_uint)num_pipe_elements; + global_work_size[0] = num_pipe_elements; std::stringstream source; @@ -312,7 +311,8 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm inptr = (cl_char *)align_malloc(size, min_alignment); - for(i = 0; i < size; i++){ + for (size_t i = 0; i < size; i++) + { inptr[i] = (char)genrand_int32(d); } BufferInPtr.reset(inptr, nullptr, 0, size, true); @@ -412,7 +412,7 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, clMemWrapper buf_reserve_id_t_size_aligned; cl_int *inptr; void *outptr; - int size, i; + int size; clProgramWrapper program; clKernelWrapper kernel[3]; size_t global_work_size[3]; @@ -565,7 +565,8 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context, size = sizeof(cl_int) * max_active_reservations; inptr = (cl_int *)align_malloc(size, min_alignment); - for(i = 0; i < max_active_reservations; i++){ + for (cl_uint i = 0; i < max_active_reservations; i++) + { inptr[i] = (int)genrand_int32(d); } BufferInPtr.reset(inptr, nullptr, 0, size, true); diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp index 580b7422..73ff3dd9 100644 --- a/test_conformance/relationals/test_comparisons_fp.cpp +++ b/test_conformance/relationals/test_comparisons_fp.cpp @@ -14,12 +14,16 @@ // limitations under the License. // +#include <cstdint> +#include <functional> #include <iostream> #include <map> #include <memory> #include <stdexcept> #include <vector> +#include "harness/stringHelpers.h" + #include <CL/cl_half.h> #include "test_comparisons_fp.h" @@ -81,29 +85,6 @@ extension, // clang-format on -std::string concat_kernel(const char* sstr[], int num) -{ - std::string res; - for (int i = 0; i < num; i++) res += std::string(sstr[i]); - return res; -} - -template <typename... Args> -std::string string_format(const std::string& format, Args... args) -{ - int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) - + 1; // Extra space for '\0' - if (size_s <= 0) - { - throw std::runtime_error("Error during formatting."); - } - auto size = static_cast<size_t>(size_s); - std::unique_ptr<char[]> buf(new char[size]); - std::snprintf(buf.get(), size, format.c_str(), args...); - return std::string(buf.get(), - buf.get() + size - 1); // We don't want the '\0' inside -} - template <typename T, typename F> bool verify(const T& A, const T& B) { return F()(A, B); @@ -224,14 +205,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKerPat_3, sizeof(equivTestKerPat_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKerPatLessGreater_3, sizeof(equivTestKerPatLessGreater_3) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } else @@ -241,14 +222,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize, auto str = concat_kernel(equivTestKernPat, sizeof(equivTestKernPat) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str(), opName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str()); } else { auto str = concat_kernel(equivTestKernPatLessGreater, sizeof(equivTestKernPatLessGreater) / sizeof(const char*)); - kernelSource = string_format(str, fnName.c_str()); + kernelSource = str_sprintf(str, fnName.c_str()); } } diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h index 7faca1c5..3401163e 100644 --- a/test_conformance/relationals/test_comparisons_fp.h +++ b/test_conformance/relationals/test_comparisons_fp.h @@ -14,8 +14,8 @@ // limitations under the License. // -#ifndef _TEST_COMPARISONS_FP_H -#define _TEST_COMPARISONS_FP_H +#ifndef TEST_COMPARISONS_FP_H +#define TEST_COMPARISONS_FP_H #include <map> #include <memory> @@ -32,6 +32,7 @@ template <typename T> using VerifyFunc = bool (*)(const T &, const T &); struct RelTestBase { explicit RelTestBase(const ExplicitTypes &dt): dataType(dt) {} + virtual ~RelTestBase() = default; ExplicitTypes dataType; }; @@ -224,4 +225,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context, return TEST_PASS; } -#endif // _TEST_COMPARISONS_FP_H +#endif // TEST_COMPARISONS_FP_H diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp index 7fa3bc08..72be08c7 100644 --- a/test_conformance/select/test_select.cpp +++ b/test_conformance/select/test_select.cpp @@ -14,11 +14,16 @@ // limitations under the License. // #include "harness/compat.h" +#include "harness/typeWrappers.h" #include <assert.h> #include <stdio.h> #include <time.h> #include <string.h> + +#include <cinttypes> +#include <vector> + #if ! defined( _WIN32) #if defined(__APPLE__) #include <sys/sysctl.h> @@ -42,11 +47,14 @@ static void initSrcBuffer(void* src1, Type stype, MTdata); // initialize the valued used to compare with in the select with // vlaues [start, count) -static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count); +static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, + const size_t count); // make a program that uses select for the given stype (src/dest type), // ctype (comparison type), veclen (vector length) -static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type stype, Type ctype, size_t veclen ); +static cl_program makeSelectProgram(cl_kernel *kernel_ptr, cl_context context, + Type stype, Type ctype, + const size_t veclen); // Creates and execute the select test for the given device, context, // stype (source/dest type), cmptype (comparison type), using max_tg_size @@ -66,6 +74,16 @@ static void printUsage( void ); #define BUFFER_SIZE (1024*1024) #define KPAGESIZE 4096 +#define test_error_count(errCode, msg) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + gFailCount++; \ + print_error(errCodeResult, msg); \ + return errCode; \ + } \ + } // When we indicate non wimpy mode, the types that are 32 bits value will // test their entire range and 64 bits test will test the 32 bit @@ -74,12 +92,6 @@ static void printUsage( void ); static bool s_wimpy_mode = false; static int s_wimpy_reduction_factor = 256; -// Tests are broken into the major test which is based on the -// src and cmp type and their corresponding vector types and -// sub tests which is for each individual test. The following -// tracks the subtests -int s_test_cnt = 0; - //----------------------------------------- // Static helper functions //----------------------------------------- @@ -112,36 +124,37 @@ static void initSrcBuffer(void* src1, Type stype, MTdata d) s1[i] = genrand_int32(d); } -static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count) { - int i; +static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start, + const size_t count) + +{ assert(cmptype != kfloat); switch (type_size[cmptype]) { case 1: { uint8_t* ub = (uint8_t *)cmp; - for (i=0; i < count; ++i) - ub[i] = (uint8_t)start++; + for (size_t i = 0; i < count; ++i) ub[i] = (uint8_t)start++; break; } case 2: { uint16_t* us = (uint16_t *)cmp; - for (i=0; i < count; ++i) - us[i] = (uint16_t)start++; + for (size_t i = 0; i < count; ++i) us[i] = (uint16_t)start++; break; } case 4: { if (!s_wimpy_mode) { uint32_t* ui = (uint32_t *)cmp; - for (i=0; i < count; ++i) - ui[i] = (uint32_t)start++; + for (size_t i = 0; i < count; ++i) ui[i] = (uint32_t)start++; } else { // The short test doesn't iterate over the entire 32 bit space so // we alternate between positive and negative values int32_t* ui = (int32_t *)cmp; - int32_t sign = 1; - for (i=0; i < count; ++i, ++start) { - ui[i] = (int32_t)start*sign; - sign = sign * -1; + int32_t neg_start = (int32_t)start * -1; + for (size_t i = 0; i < count; i++) + { + ++start; + --neg_start; + ui[i] = (int32_t)((i % 2) ? start : neg_start); } } break; @@ -150,10 +163,12 @@ static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count) // We don't iterate over the entire space of 64 bit so for the // selects, we want to test positive and negative values int64_t* ll = (int64_t *)cmp; - int64_t sign = 1; - for (i=0; i < count; ++i, ++start) { - ll[i] = start*sign; - sign = sign * -1; + int64_t neg_start = (int64_t)start * -1; + for (size_t i = 0; i < count; i++) + { + ++start; + --neg_start; + ll[i] = (int64_t)((i % 2) ? start : neg_start); } break; } @@ -165,7 +180,9 @@ static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count) // Make the various incarnations of the program we want to run // stype: source and destination type for the select // ctype: compare type -static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type srctype, Type cmptype, size_t vec_len) +static cl_program makeSelectProgram(cl_kernel *kernel_ptr, + const cl_context context, Type srctype, + Type cmptype, const size_t vec_len) { char testname[256]; char stypename[32]; @@ -237,6 +254,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont if (srctype == kdouble) strcpy( extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" ); + if (srctype == khalf) + strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"); + // create type name and testname switch( vec_len ) { @@ -288,39 +308,38 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont return program; } - #define VECTOR_SIZE_COUNT 6 static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device) { int err = CL_SUCCESS; - int s_test_fail = 0; - MTdataHolder d; + MTdataHolder d(gRandomSeed); const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 }; - cl_mem src1 = NULL; - cl_mem src2 = NULL; - cl_mem cmp = NULL; - cl_mem dest = NULL; - void *ref = NULL; - void *sref = NULL; + clMemWrapper src1, src2, cmp, dest; cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE; - size_t block_elements = BUFFER_SIZE / type_size[stype]; + const size_t block_elements = BUFFER_SIZE / type_size[stype]; size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1; cl_ulong cmp_stride = block_elements * step; // It is more efficient to create the tests all at once since we // use the same test data on each of the vector sizes - int vecsize; - cl_program programs[VECTOR_SIZE_COUNT]; - cl_kernel kernels[VECTOR_SIZE_COUNT]; + clProgramWrapper programs[VECTOR_SIZE_COUNT]; + clKernelWrapper kernels[VECTOR_SIZE_COUNT]; - if(stype == kdouble && ! is_extension_available( device, "cl_khr_fp64" )) + if (stype == kdouble && !is_extension_available(device, "cl_khr_fp64")) { log_info("Skipping double because cl_khr_fp64 extension is not supported.\n"); return 0; } + if (stype == khalf && !is_extension_available(device, "cl_khr_fp16")) + { + log_info( + "Skipping half because cl_khr_fp16 extension is not supported.\n"); + return 0; + } + if (gIsEmbedded) { if (( stype == klong || stype == kulong ) && ! is_extension_available( device, "cles_khr_int64" )) @@ -336,29 +355,51 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c } } - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) - { - programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] ); - if (!programs[vecsize] || !kernels[vecsize]) { - ++s_test_fail; - ++s_test_cnt; - return -1; - } - } - - ref = malloc( BUFFER_SIZE ); - if( NULL == ref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } - sref = malloc( BUFFER_SIZE ); - if( NULL == sref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; } src1 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src1 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src1 buffer\n"); src2 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate src2 buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate src2 buffer\n"); cmp = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate cmp buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate cmp buffer\n"); dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err ); - if( err ) { log_error( "Error: could not allocate dest buffer\n" ); ++s_test_fail; goto exit; } + test_error_count(err, "Error: could not allocate dest buffer\n"); + + programs[0] = makeSelectProgram(&kernels[0], context, stype, cmptype, + element_count[0]); + programs[1] = makeSelectProgram(&kernels[1], context, stype, cmptype, + element_count[1]); + programs[2] = makeSelectProgram(&kernels[2], context, stype, cmptype, + element_count[2]); + programs[3] = makeSelectProgram(&kernels[3], context, stype, cmptype, + element_count[3]); + programs[4] = makeSelectProgram(&kernels[4], context, stype, cmptype, + element_count[4]); + programs[5] = makeSelectProgram(&kernels[5], context, stype, cmptype, + element_count[5]); + + for (size_t vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) + { + if (!programs[vecsize] || !kernels[vecsize]) + { + return -1; + } + + err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp); + test_error_count(err, "Error: Cannot set kernel arg dest!\n"); + } + std::vector<char> ref(BUFFER_SIZE); + std::vector<char> sref(BUFFER_SIZE); + std::vector<char> src1_host(BUFFER_SIZE); + std::vector<char> src2_host(BUFFER_SIZE); + std::vector<char> cmp_host(BUFFER_SIZE); + std::vector<char> dest_host(BUFFER_SIZE); // We block the test as we are running over the range of compare values // "block the test" means "break the test into blocks" @@ -368,81 +409,63 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1); log_info("Testing..."); - d = MTdataHolder(gRandomSeed); uint64_t i; + + initSrcBuffer(src1_host.data(), stype, d); + initSrcBuffer(src2_host.data(), stype, d); for (i=0; i < blocks; i+=step) { - void *s1 = clEnqueueMapBuffer( queue, src1, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src1" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s1, stype, d); - - void *s2 = clEnqueueMapBuffer( queue, src2, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map src2" ); goto exit; } - // Setup the input data to change for each block - initSrcBuffer( s2, stype, d); - - void *s3 = clEnqueueMapBuffer( queue, cmp, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map cmp" ); goto exit; } - // Setup the input data to change for each block - initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements); - - // Create the reference result - Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1]; - (*sfunc)(ref, s1, s2, s3, block_elements); - - sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1]; - (*sfunc)(sref, s1, s2, s3, block_elements); - - if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src1\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap src2\n" ); ++s_test_fail; goto exit; } - if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL ))) - { log_error( "Error: coult not unmap cmp\n" ); ++s_test_fail; goto exit; } - - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) + initCmpBuffer(cmp_host.data(), cmptype, i * cmp_stride, block_elements); + + err = clEnqueueWriteBuffer(queue, src1, CL_FALSE, 0, BUFFER_SIZE, + src1_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src1"); + + err = clEnqueueWriteBuffer(queue, src2, CL_FALSE, 0, BUFFER_SIZE, + src2_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write src2"); + + err = clEnqueueWriteBuffer(queue, cmp, CL_FALSE, 0, BUFFER_SIZE, + cmp_host.data(), 0, NULL, NULL); + test_error_count(err, "Error: Could not write cmp"); + + Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] + : vrefSelects[stype][1]; + (*sfunc)(ref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); + + sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] + : refSelects[stype][1]; + (*sfunc)(sref.data(), src1_host.data(), src2_host.data(), + cmp_host.data(), block_elements); + + for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize) { size_t vector_size = element_count[vecsize] * type_size[stype]; size_t vector_count = (BUFFER_SIZE + vector_size - 1) / vector_size; - if((err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - if((err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp) )) - { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; } - + const cl_int pattern = -1; + err = clEnqueueFillBuffer(queue, dest, &pattern, sizeof(cl_int), 0, + BUFFER_SIZE, 0, nullptr, nullptr); + test_error_count(err, "clEnqueueFillBuffer failed"); - // Wipe destination - void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map dest" ); ++s_test_fail; goto exit; } - memset( d, -1, BUFFER_SIZE ); - if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ){ log_error( "Error: Could not unmap dest" ); ++s_test_fail; goto exit; } err = clEnqueueNDRangeKernel(queue, kernels[vecsize], 1, NULL, &vector_count, NULL, 0, NULL, NULL); - if (err != CL_SUCCESS) { - log_error("clEnqueueNDRangeKernel failed errcode:%d\n", err); - ++s_test_fail; - goto exit; - } - - d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_READ, 0, BUFFER_SIZE, 0, NULL, NULL, &err ); - if( err ){ log_error( "Error: Could not map dest # 2" ); ++s_test_fail; goto exit; } + test_error_count(err, "clEnqueueNDRangeKernel failed errcode\n"); - if ((*checkResults[stype])(d, vecsize == 0 ? sref : ref, block_elements, element_count[vecsize])!=0){ - log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i); - ++s_test_fail; - goto exit; - } + err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE, + dest_host.data(), 0, NULL, NULL); + test_error_count( + err, "Error: Reading buffer from dest to dest_host failed\n"); - if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ) + if ((*checkResults[stype])(dest_host.data(), + vecsize == 0 ? sref.data() : ref.data(), + block_elements, element_count[vecsize]) + != 0) { - log_error( "Error: Could not unmap dest" ); - ++s_test_fail; - goto exit; + log_error("vec_size:%d indx: 0x%16.16" PRIx64 "\n", + (int)element_count[vecsize], i); + return TEST_FAIL; } } // for vecsize } // for i @@ -452,24 +475,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c else log_info(" Wimpy Passed\n\n"); -exit: - if( src1 ) clReleaseMemObject( src1 ); - if( src2 ) clReleaseMemObject( src2 ); - if( cmp ) clReleaseMemObject( cmp ); - if( dest) clReleaseMemObject( dest ); - if( ref ) free(ref ); - if( sref ) free(sref ); - - for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) { - clReleaseKernel(kernels[vecsize]); - clReleaseProgram(programs[vecsize]); - } - ++s_test_cnt; - if (s_test_fail) - { - err = TEST_FAIL; - gFailCount++; - } return err; } @@ -505,6 +510,16 @@ int test_select_short_short(cl_device_id deviceID, cl_context context, cl_comman { return doTest(queue, context, kshort, kshort, deviceID); } +int test_select_half_ushort(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kushort, deviceID); +} +int test_select_half_short(cl_device_id deviceID, cl_context context, + cl_command_queue queue, int num_elements) +{ + return doTest(queue, context, khalf, kshort, deviceID); +} int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) { return doTest(queue, context, kuint, kuint, deviceID); @@ -555,26 +570,17 @@ int test_select_double_long(cl_device_id deviceID, cl_context context, cl_comman } test_definition test_list[] = { - ADD_TEST( select_uchar_uchar ), - ADD_TEST( select_uchar_char ), - ADD_TEST( select_char_uchar ), - ADD_TEST( select_char_char ), - ADD_TEST( select_ushort_ushort ), - ADD_TEST( select_ushort_short ), - ADD_TEST( select_short_ushort ), - ADD_TEST( select_short_short ), - ADD_TEST( select_uint_uint ), - ADD_TEST( select_uint_int ), - ADD_TEST( select_int_uint ), - ADD_TEST( select_int_int ), - ADD_TEST( select_float_uint ), - ADD_TEST( select_float_int ), - ADD_TEST( select_ulong_ulong ), - ADD_TEST( select_ulong_long ), - ADD_TEST( select_long_ulong ), - ADD_TEST( select_long_long ), - ADD_TEST( select_double_ulong ), - ADD_TEST( select_double_long ), + ADD_TEST(select_uchar_uchar), ADD_TEST(select_uchar_char), + ADD_TEST(select_char_uchar), ADD_TEST(select_char_char), + ADD_TEST(select_ushort_ushort), ADD_TEST(select_ushort_short), + ADD_TEST(select_short_ushort), ADD_TEST(select_short_short), + ADD_TEST(select_half_ushort), ADD_TEST(select_half_short), + ADD_TEST(select_uint_uint), ADD_TEST(select_uint_int), + ADD_TEST(select_int_uint), ADD_TEST(select_int_int), + ADD_TEST(select_float_uint), ADD_TEST(select_float_int), + ADD_TEST(select_ulong_ulong), ADD_TEST(select_ulong_long), + ADD_TEST(select_long_ulong), ADD_TEST(select_long_long), + ADD_TEST(select_double_ulong), ADD_TEST(select_double_long), }; const int test_num = ARRAY_SIZE( test_list ); diff --git a/test_conformance/select/test_select.h b/test_conformance/select/test_select.h index c51ae13c..5cd78602 100644 --- a/test_conformance/select/test_select.h +++ b/test_conformance/select/test_select.h @@ -28,18 +28,20 @@ #endif // Defines the set of types we support (no support for double) -typedef enum { +typedef enum +{ kuchar = 0, kchar = 1, kushort = 2, kshort = 3, - kuint = 4, - kint = 5, - kfloat = 6, - kulong = 7, - klong = 8, - kdouble = 9, - kTypeCount // always goes last + khalf = 4, + kuint = 5, + kint = 6, + kfloat = 7, + kulong = 8, + klong = 9, + kdouble = 10, + kTypeCount // always goes last } Type; @@ -56,7 +58,8 @@ extern const size_t type_size[kTypeCount]; extern const Type ctype[kTypeCount][2]; // Reference functions for the primitive (non vector) type -typedef void (*Select)(void *dest, void *src1, void *src2, void *cmp, size_t c); +typedef void (*Select)(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t c); extern Select refSelects[kTypeCount][2]; // Reference functions for the primtive type but uses the vector @@ -64,7 +67,8 @@ extern Select refSelects[kTypeCount][2]; extern Select vrefSelects[kTypeCount][2]; // Check functions for each output type -typedef size_t (*CheckResults)(void *out1, void *out2, size_t count, size_t vectorSize); +typedef size_t (*CheckResults)(const void *const out1, const void *const out2, + size_t count, size_t vectorSize); extern CheckResults checkResults[kTypeCount]; // Helpful macros diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp index f9641e99..078ff64a 100644 --- a/test_conformance/select/util_select.cpp +++ b/test_conformance/select/util_select.cpp @@ -13,10 +13,10 @@ // See the License for the specific language governing permissions and // limitations under the License. // -#include "harness/compat.h" #include "harness/errorHelpers.h" #include <stdio.h> +#include <cinttypes> #include "test_select.h" @@ -25,29 +25,28 @@ //----------------------------------------- -const char *type_name[kTypeCount] = { - "uchar", "char", - "ushort", "short", - "uint", "int", - "float", "ulong", "long", "double" }; +const char *type_name[kTypeCount] = { "uchar", "char", "ushort", "short", + "half", "uint", "int", "float", + "ulong", "long", "double" }; const size_t type_size[kTypeCount] = { - sizeof(cl_uchar), sizeof(cl_char), - sizeof(cl_ushort), sizeof(cl_short), - sizeof(cl_uint), sizeof(cl_int), - sizeof(cl_float), sizeof(cl_ulong), sizeof(cl_long), sizeof( cl_double ) }; + sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short), + sizeof(cl_half), sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), + sizeof(cl_ulong), sizeof(cl_long), sizeof(cl_double) +}; const Type ctype[kTypeCount][2] = { - { kuchar, kchar }, // uchar - { kuchar, kchar }, // char - { kushort, kshort}, // ushort - { kushort, kshort}, // short - { kuint, kint }, // uint - { kuint, kint }, // int - { kuint, kint }, // float - { kulong, klong }, // ulong - { kulong, klong }, // long - { kulong, klong } // double + { kuchar, kchar }, // uchar + { kuchar, kchar }, // char + { kushort, kshort }, // ushort + { kushort, kshort }, // short + { kushort, kshort }, // half + { kuint, kint }, // uint + { kuint, kint }, // int + { kuint, kint }, // float + { kulong, klong }, // ulong + { kulong, klong }, // long + { kulong, klong } // double }; @@ -55,510 +54,594 @@ const Type ctype[kTypeCount][2] = { // Reference functions //----------------------------------------- -void refselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) { d[i] = m[i] ? y[i] : x[i]; } } -void refselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_short*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*)dest; - x = (cl_int*)src1; - y = (cl_int*)src2; - m = (cl_int*)cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void refselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*)dest; - x = (cl_uint*)src1; - y = (cl_uint*)src2; - m = (cl_int*)cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_int *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; +} + +void refselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_long *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void refselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void refselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = m[i] ? y[i] : x[i]; } -void vrefselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_char *d, *x, *y, *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_char*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uchar *d, *x, *y; - cl_char *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_char*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_char *const m = (cl_char *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80) ? y[i] : x[i]; } -void vrefselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y, *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_short*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y; - cl_short *m; - d = (cl_ushort*) dest; - x = (cl_ushort*)src1; - y = (cl_ushort*)src2; - m = (cl_short*)cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; } -void vrefselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y, *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_int*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){ +void vrefselect_1u32(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y, *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_long*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_char *d, *x, *y; - cl_uchar *m; - d = (cl_char*) dest; - x = (cl_char*) src1; - y = (cl_char*) src2; - m = (cl_uchar*) cmp; + cl_char *const d = (cl_char *)dest; + const cl_char *const x = (cl_char *)src1; + const cl_char *const y = (cl_char *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u8u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uchar *d, *x, *y, *m; - d = (cl_uchar*) dest; - x = (cl_uchar*) src1; - y = (cl_uchar*) src2; - m = (cl_uchar*) cmp; + cl_uchar *const d = (cl_uchar *)dest; + const cl_uchar *const x = (cl_uchar *)src1; + const cl_uchar *const y = (cl_uchar *)src2; + const cl_uchar *const m = (cl_uchar *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80U) ? y[i] : x[i]; } -void vrefselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_short *d, *x, *y; - cl_ushort *m; - d = (cl_short*) dest; - x = (cl_short*) src1; - y = (cl_short*) src2; - m = (cl_ushort*) cmp; + cl_short *const d = (cl_short *)dest; + const cl_short *const x = (cl_short *)src1; + const cl_short *const y = (cl_short *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u16u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ushort *d, *x, *y, *m; - d = (cl_ushort*) dest; - x = (cl_ushort*) src1; - y = (cl_ushort*) src2; - m = (cl_ushort*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; } -void vrefselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_int *d, *x, *y; - cl_uint *m; - d = (cl_int*) dest; - x = (cl_int*) src1; - y = (cl_int*) src2; - m = (cl_uint*) cmp; + cl_int *const d = (cl_int *)dest; + const cl_int *const x = (cl_int *)src1; + const cl_int *const y = (cl_int *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u32u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_uint *d, *x, *y, *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1i64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_long *d, *x, *y; - cl_ulong *m; - d = (cl_long*) dest; - x = (cl_long*) src1; - y = (cl_long*) src2; - m = (cl_ulong*) cmp; + cl_long *const d = (cl_long *)dest; + const cl_long *const x = (cl_long *)src1; + const cl_long *const y = (cl_long *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_1u64u(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, + size_t count) +{ size_t i; - cl_ulong *d, *x, *y, *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } -void vrefselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_hhi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_int *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_int*) cmp; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_short *const m = (cl_short *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i]; +} + +void vrefselect_hhu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_ushort *const d = (cl_ushort *)dest; + const cl_ushort *const x = (cl_ushort *)src1; + const cl_ushort *const y = (cl_ushort *)src2; + const cl_ushort *const m = (cl_ushort *)cmp; + for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i]; +} + +void vrefselect_ffi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ + size_t i; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_int *const m = (cl_int *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000) ? y[i] : x[i]; } -void vrefselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ffu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_uint *d, *x, *y; - cl_uint *m; - d = (cl_uint*) dest; - x = (cl_uint*) src1; - y = (cl_uint*) src2; - m = (cl_uint*) cmp; + cl_uint *const d = (cl_uint *)dest; + const cl_uint *const x = (cl_uint *)src1; + const cl_uint *const y = (cl_uint *)src2; + const cl_uint *const m = (cl_uint *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x80000000U) ? y[i] : x[i]; } -void vrefselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddi(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_long *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_long*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_long *const m = (cl_long *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i]; } -void vrefselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) { +void vrefselect_ddu(void *const dest, const void *const src1, + const void *const src2, const void *const cmp, size_t count) +{ size_t i; - cl_ulong *d, *x, *y; - cl_ulong *m; - d = (cl_ulong*) dest; - x = (cl_ulong*) src1; - y = (cl_ulong*) src2; - m = (cl_ulong*) cmp; + cl_ulong *const d = (cl_ulong *)dest; + const cl_ulong *const x = (cl_ulong *)src1; + const cl_ulong *const y = (cl_ulong *)src2; + const cl_ulong *const m = (cl_ulong *)cmp; for (i=0; i < count; ++i) d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i]; } // Define refSelects -Select refSelects[kTypeCount][2] = { - { refselect_1u8u, refselect_1u8 }, // cl_uchar - { refselect_1i8u, refselect_1i8 }, // char +Select refSelects[kTypeCount][2] = { + { refselect_1u8u, refselect_1u8 }, // cl_uchar + { refselect_1i8u, refselect_1i8 }, // char { refselect_1u16u, refselect_1u16 }, // ushort { refselect_1i16u, refselect_1i16 }, // short + { refselect_hhu, refselect_hhi }, // half { refselect_1u32u, refselect_1u32 }, // uint { refselect_1i32u, refselect_1i32 }, // int - { refselect_ffu, refselect_ffi }, // float + { refselect_ffu, refselect_ffi }, // float { refselect_1u64u, refselect_1u64 }, // ulong { refselect_1i64u, refselect_1i64 }, // long - { refselect_ddu, refselect_ddi } // double + { refselect_ddu, refselect_ddi } // double }; // Define vrefSelects (vector refSelects) -Select vrefSelects[kTypeCount][2] = { - { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar - { vrefselect_1i8u, vrefselect_1i8 }, // char +Select vrefSelects[kTypeCount][2] = { + { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar + { vrefselect_1i8u, vrefselect_1i8 }, // char { vrefselect_1u16u, vrefselect_1u16 }, // ushort { vrefselect_1i16u, vrefselect_1i16 }, // short + { vrefselect_hhu, vrefselect_hhi }, // half { vrefselect_1u32u, vrefselect_1u32 }, // uint { vrefselect_1i32u, vrefselect_1i32 }, // int - { vrefselect_ffu, vrefselect_ffi }, // float + { vrefselect_ffu, vrefselect_ffi }, // float { vrefselect_1u64u, vrefselect_1u64 }, // ulong { vrefselect_1i64u, vrefselect_1i64 }, // long - { vrefselect_ddu, vrefselect_ddi } // double + { vrefselect_ddu, vrefselect_ddi } // double }; //----------------------------------------- // Check functions //----------------------------------------- -size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uchar *t = (const cl_uchar *) test; - const cl_uchar *c = (const cl_uchar *) correct; +size_t check_uchar(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uchar *const t = (const cl_uchar *)test; + const cl_uchar *const c = (const cl_uchar *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -566,8 +649,8 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_uchar) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_uchar) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -576,9 +659,11 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { - const cl_char *t = (const cl_char *) test; - const cl_char *c = (const cl_char *) correct; +size_t check_char(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_char *const t = (const cl_char *)test; + const cl_char *const c = (const cl_char *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -586,8 +671,8 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_char) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_char) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%2.2x vs 0x%2.2x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -597,9 +682,11 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ushort *t = (const cl_ushort *) test; - const cl_ushort *c = (const cl_ushort *) correct; +size_t check_ushort(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -607,8 +694,8 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_ushort) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_ushort) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -618,9 +705,11 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_short(void *test, void *correct, size_t count, size_t vector_size) { - const cl_short *t = (const cl_short *) test; - const cl_short *c = (const cl_short *) correct; +size_t check_short(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_short *const t = (const cl_short *)test; + const cl_short *const c = (const cl_short *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -628,8 +717,8 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size) for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_short) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_short) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -639,9 +728,11 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_uint(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -649,8 +740,8 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_uint) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_uint) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -660,9 +751,11 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { - const cl_int *t = (const cl_int *) test; - const cl_int *c = (const cl_int *) correct; +size_t check_int(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_int *const t = (const cl_int *)test; + const cl_int *const c = (const cl_int *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -671,8 +764,8 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { if (t[i] != c[i]) { - log_error("\n(check_int) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_int) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -682,9 +775,11 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_ulong(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -692,9 +787,9 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_ulong) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", + log_error("\n(check_ulong) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " + "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n", vector_size, i, count, c[i], t[i]); return i + 1; } @@ -703,9 +798,11 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) return 0; } -size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { - const cl_long *t = (const cl_long *) test; - const cl_long *c = (const cl_long *) correct; +size_t check_long(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_long *const t = (const cl_long *)test; + const cl_long *const c = (const cl_long *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -713,9 +810,34 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { for (i = 0; i < count; i++) if (t[i] != c[i]) { - log_error("\n(check_long) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", + log_error("\n(check_long) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " + "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n", + vector_size, i, count, c[i], t[i]); + return i + 1; + } + } + + return 0; +} + +size_t check_half(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ushort *const t = (const cl_ushort *)test; + const cl_ushort *const c = (const cl_ushort *)correct; + size_t i; + + if (memcmp(t, c, count * sizeof(c[0])) != 0) + { + for (i = 0; i < count; i++) /* Allow nans to be binary different */ + if ((t[i] != c[i]) + && !(isnan(((cl_half *)correct)[i]) + && isnan(((cl_half *)test)[i]))) + { + log_error("\n(check_half) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " + "*0x%4.4x vs 0x%4.4x\n", vector_size, i, count, c[i], t[i]); return i + 1; } @@ -724,9 +846,11 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) { return 0; } -size_t check_float( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_uint *t = (const cl_uint *) test; - const cl_uint *c = (const cl_uint *) correct; +size_t check_float(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_uint *const t = (const cl_uint *)test; + const cl_uint *const c = (const cl_uint *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -735,8 +859,8 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size if ((t[i] != c[i]) && !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i]))) { - log_error("\n(check_float) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " + log_error("\n(check_float) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " "*0x%8.8x vs 0x%8.8x\n", vector_size, i, count, c[i], t[i]); return i + 1; @@ -746,9 +870,11 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size return 0; } -size_t check_double( void *test, void *correct, size_t count, size_t vector_size ) { - const cl_ulong *t = (const cl_ulong *) test; - const cl_ulong *c = (const cl_ulong *) correct; +size_t check_double(const void *const test, const void *const correct, + size_t count, size_t vector_size) +{ + const cl_ulong *const t = (const cl_ulong *)test; + const cl_ulong *const c = (const cl_ulong *)correct; size_t i; if (memcmp(t, c, count * sizeof(c[0])) != 0) @@ -758,9 +884,9 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size && !(isnan(((double *)correct)[i]) && isnan(((double *)test)[i]))) { - log_error("\n(check_double) Error for vector size %ld found at " - "0x%8.8lx (of 0x%8.8lx): " - "*0x%16.16llx vs 0x%16.16llx\n", + log_error("\n(check_double) Error for vector size %zu found at " + "0x%8.8zx (of 0x%8.8zx): " + "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n", vector_size, i, count, c[i], t[i]); return i + 1; } @@ -770,5 +896,7 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size } CheckResults checkResults[kTypeCount] = { - check_uchar, check_char, check_ushort, check_short, check_uint, - check_int, check_float, check_ulong, check_long, check_double }; + check_uchar, check_char, check_ushort, check_short, + check_half, check_uint, check_int, check_float, + check_ulong, check_long, check_double +}; diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 new file mode 100644 index 00000000..49127187 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 @@ -0,0 +1,35 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 17 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %11 = OpLabel + %12 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %13 = OpCompositeExtract %uint %12 0 + %14 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %13 + %15 = OpLoad %half %14 + %16 = OpFNegate %half %15 + OpStore %14 %16 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 new file mode 100644 index 00000000..9c7e3d6d --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 @@ -0,0 +1,39 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 20 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half + %ulong_32 = OpConstant %ulong 32 +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %10 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %12 = OpLabel + %13 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %14 = OpCompositeExtract %ulong %13 0 + %15 = OpShiftLeftLogical %ulong %14 %ulong_32 + %16 = OpShiftRightArithmetic %ulong %15 %ulong_32 + %17 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %16 + %18 = OpLoad %half %17 + %19 = OpFNegate %half %18 + OpStore %17 %19 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 new file mode 100644 index 00000000..985b5262 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 @@ -0,0 +1,42 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 22 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %13 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half + %14 = OpFunctionParameter %uint + %15 = OpLabel + %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %17 = OpCompositeExtract %uint %16 0 + %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %17 + %19 = OpLoad %v8half %18 + %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %17 + %21 = OpVectorExtractDynamic %half %19 %14 + OpStore %20 %21 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 new file mode 100644 index 00000000..dd14f66c --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 @@ -0,0 +1,47 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 26 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %uint = OpTypeInt 32 0 + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %15 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half + %16 = OpFunctionParameter %uint + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %21 + %23 = OpLoad %v8half %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %21 + %25 = OpVectorExtractDynamic %half %23 %16 + OpStore %24 %25 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 new file mode 100644 index 00000000..27812938 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 @@ -0,0 +1,43 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 23 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %13 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %14 = OpFunctionParameter %uint + %15 = OpLabel + %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %17 = OpCompositeExtract %uint %16 0 + %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %17 + %19 = OpLoad %half %18 + %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %17 + %21 = OpLoad %v8half %20 + %22 = OpVectorInsertDynamic %v8half %21 %19 %14 + OpStore %20 %22 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 new file mode 100644 index 00000000..f140fc25 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 @@ -0,0 +1,48 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 27 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Vector16 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID + OpName %in "in" + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %in FuncParamAttr NoCapture + OpDecorate %4 FuncParamAttr NoCapture + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %uint = OpTypeInt 32 0 + %void = OpTypeVoid + %half = OpTypeFloat 16 + %v8half = OpTypeVector %half 8 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half +%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %15 + %in = OpFunctionParameter %_ptr_CrossWorkgroup_half + %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half + %16 = OpFunctionParameter %uint + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %21 + %23 = OpLoad %half %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %21 + %25 = OpLoad %v8half %24 + %26 = OpVectorInsertDynamic %v8half %25 %23 %16 + OpStore %24 %26 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 new file mode 100644 index 00000000..6fda7d8f --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 @@ -0,0 +1,46 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 25 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Float16 + OpMemoryModel Physical32 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %1 = OpFunction %void None %15 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %16 = OpLabel + %17 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0 + %18 = OpCompositeExtract %uint %17 0 + %19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %18 + %20 = OpLoad %v4half %19 Aligned 8 + %21 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %18 + %22 = OpLoad %half %21 Aligned 2 + %23 = OpVectorTimesScalar %v4half %20 %22 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %18 + OpStore %24 %23 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 new file mode 100644 index 00000000..fa2d5221 --- /dev/null +++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 @@ -0,0 +1,50 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 28 +; Schema: 0 + OpCapability Addresses + OpCapability Linkage + OpCapability Kernel + OpCapability Int64 + OpCapability Float16 + OpMemoryModel Physical64 OpenCL + OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID + OpName %res "res" + OpName %lhs "lhs" + OpName %rhs "rhs" + OpDecorate %5 FuncParamAttr NoCapture + %5 = OpDecorationGroup + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_GlobalInvocationID Constant + OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import + OpGroupDecorate %5 %res %lhs %rhs + %ulong = OpTypeInt 64 0 + %v3ulong = OpTypeVector %ulong 3 +%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong + %ulong_32 = OpConstant %ulong 32 + %void = OpTypeVoid + %half = OpTypeFloat 16 +%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half + %v4half = OpTypeVector %half 4 +%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half + %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input + %1 = OpFunction %void None %16 + %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half + %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half + %17 = OpLabel + %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0 + %19 = OpCompositeExtract %ulong %18 0 + %20 = OpShiftLeftLogical %ulong %19 %ulong_32 + %21 = OpShiftRightArithmetic %ulong %20 %ulong_32 + %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %21 + %23 = OpLoad %v4half %22 Aligned 8 + %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %21 + %25 = OpLoad %half %24 Aligned 2 + %26 = OpVectorTimesScalar %v4half %23 %25 + %27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %21 + OpStore %27 %26 Aligned 8 + OpReturn + OpFunctionEnd diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp index e3dc1f34..5009be93 100644 --- a/test_conformance/spirv_new/test_op_negate.cpp +++ b/test_conformance/spirv_new/test_op_negate.cpp @@ -32,6 +32,15 @@ int test_negation(cl_device_id deviceID, return 0; } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } cl_int err = CL_SUCCESS; int num = (int)h_in.size(); @@ -73,29 +82,28 @@ int test_negation(cl_device_id deviceID, return 0; } -#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ - TEST_SPIRV_FUNC(OP##_##TYPE) \ - { \ - int num = 1 << 20; \ - std::vector<Tv> in(num); \ - RandomSeed seed(gRandomSeed); \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand<Tv>(seed); \ - } \ - return test_negation<Tv>(deviceID, \ - context, \ - queue, \ - #TYPE, \ - #OP, \ - in, FUNC); \ - } \ +#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \ + TEST_SPIRV_FUNC(OP##_##TYPE) \ + { \ + int num = 1 << 20; \ + std::vector<Tv> in(num); \ + RandomSeed seed(gRandomSeed); \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand<Tv>(seed); \ + } \ + return test_negation<Tv>(deviceID, context, queue, #TYPE, #OP, in, \ + FUNC); \ + } +#define TEST_NEG_HALF TEST_NEGATION(half, cl_half, op_neg, negOpHalf) #define TEST_NEG(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_neg, negOp<cl_##TYPE>) #define TEST_NOT(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_not, notOp<cl_##TYPE>) #define TEST_NEG_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_neg, (negOpVec<cl_##TYPE##N, N>)) #define TEST_NOT_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_not, (notOpVec<cl_##TYPE##N, N>)) +TEST_NEG_HALF TEST_NEG(float) TEST_NEG(double) TEST_NEG(int) diff --git a/test_conformance/spirv_new/test_op_vector_extract.cpp b/test_conformance/spirv_new/test_op_vector_extract.cpp index fe1f8253..f77aa7a2 100644 --- a/test_conformance/spirv_new/test_op_vector_extract.cpp +++ b/test_conformance/spirv_new/test_op_vector_extract.cpp @@ -25,6 +25,17 @@ int test_extract(cl_device_id deviceID, cl_context context, return 0; } } + + if (std::string(name).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; clProgramWrapper prog; @@ -76,27 +87,30 @@ int test_extract(cl_device_id deviceID, cl_context context, return 0; } -#define TEST_VECTOR_EXTRACT(TYPE, N) \ - TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \ - { \ - typedef cl_##TYPE##N Tv; \ - typedef cl_##TYPE Ts; \ - const int num = 1 << 20; \ - std::vector<Tv> in(num); \ - const char *name = "vector_" #TYPE #N "_extract"; \ - \ - RandomSeed seed(gRandomSeed); \ - \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand<Tv>(seed); \ - } \ - \ - return test_extract<Tv, Ts>(deviceID, \ - context, queue, \ - name, \ - in, N); \ +#define TEST_VECTOR_EXTRACT(TYPE, N) \ + TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \ + { \ + if (sizeof(cl_##TYPE) == 2) \ + { \ + PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \ + } \ + typedef cl_##TYPE##N Tv; \ + typedef cl_##TYPE Ts; \ + const int num = 1 << 20; \ + std::vector<Tv> in(num); \ + const char *name = "vector_" #TYPE #N "_extract"; \ + \ + RandomSeed seed(gRandomSeed); \ + \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand<Tv>(seed); \ + } \ + \ + return test_extract<Tv, Ts>(deviceID, context, queue, name, in, N); \ } +TEST_VECTOR_EXTRACT(half, 8) TEST_VECTOR_EXTRACT(int, 4) TEST_VECTOR_EXTRACT(float, 4) TEST_VECTOR_EXTRACT(long, 2) diff --git a/test_conformance/spirv_new/test_op_vector_insert.cpp b/test_conformance/spirv_new/test_op_vector_insert.cpp index 0749c14a..62fc78cb 100644 --- a/test_conformance/spirv_new/test_op_vector_insert.cpp +++ b/test_conformance/spirv_new/test_op_vector_insert.cpp @@ -25,6 +25,17 @@ int test_insert(cl_device_id deviceID, cl_context context, return 0; } } + + if (std::string(name).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info( + "Extension cl_khr_fp16 not supported; skipping half tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; clProgramWrapper prog; err = get_program_with_il(prog, deviceID, context, name); @@ -94,27 +105,30 @@ int test_insert(cl_device_id deviceID, cl_context context, return 0; } -#define TEST_VECTOR_INSERT(TYPE, N) \ - TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \ - { \ - typedef cl_##TYPE##N Tv; \ - typedef cl_##TYPE Ts; \ - const int num = 1 << 20; \ - std::vector<Ts> in(num); \ - const char *name = "vector_" #TYPE #N "_insert"; \ - \ - RandomSeed seed(gRandomSeed); \ - \ - for (int i = 0; i < num; i++) { \ - in[i] = genrand<Ts>(seed); \ - } \ - \ - return test_insert<Ts, Tv>(deviceID, \ - context, queue, \ - name, \ - in, N); \ +#define TEST_VECTOR_INSERT(TYPE, N) \ + TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \ + { \ + if (sizeof(cl_##TYPE) == 2) \ + { \ + PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \ + } \ + typedef cl_##TYPE##N Tv; \ + typedef cl_##TYPE Ts; \ + const int num = 1 << 20; \ + std::vector<Ts> in(num); \ + const char *name = "vector_" #TYPE #N "_insert"; \ + \ + RandomSeed seed(gRandomSeed); \ + \ + for (int i = 0; i < num; i++) \ + { \ + in[i] = genrand<Ts>(seed); \ + } \ + \ + return test_insert<Ts, Tv>(deviceID, context, queue, name, in, N); \ } +TEST_VECTOR_INSERT(half, 8) TEST_VECTOR_INSERT(int, 4) TEST_VECTOR_INSERT(float, 4) TEST_VECTOR_INSERT(long, 2) diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp index 0859668c..0be4e8b7 100644 --- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp +++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp @@ -17,6 +17,8 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos #include <sstream> #include <string> +using half = cl_half; + template<typename Tv, typename Ts> int test_vector_times_scalar(cl_device_id deviceID, cl_context context, @@ -32,6 +34,16 @@ int test_vector_times_scalar(cl_device_id deviceID, } } + if (std::string(Tname).find("half") != std::string::npos) + { + if (!is_extension_available(deviceID, "cl_khr_fp16")) + { + log_info("Extension cl_khr_fp16 not supported; skipping half " + "tests.\n"); + return 0; + } + } + cl_int err = CL_SUCCESS; int num = (int)h_lhs.size(); size_t lhs_bytes = num * sizeof(Tv); @@ -171,5 +183,7 @@ int test_vector_times_scalar(cl_device_id deviceID, lhs, rhs); \ } + TEST_VECTOR_TIMES_SCALAR(float, 4) TEST_VECTOR_TIMES_SCALAR(double, 4) +TEST_VECTOR_TIMES_SCALAR(half, 4) diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp index e7fceba0..728b2445 100644 --- a/test_conformance/spirv_new/types.hpp +++ b/test_conformance/spirv_new/types.hpp @@ -43,6 +43,8 @@ VEC_NOT_EQ_FUNC(cl_float, 2) VEC_NOT_EQ_FUNC(cl_float, 4) VEC_NOT_EQ_FUNC(cl_double, 2) VEC_NOT_EQ_FUNC(cl_double, 4) +VEC_NOT_EQ_FUNC(cl_half, 2) +VEC_NOT_EQ_FUNC(cl_half, 4) template<typename T> bool isNotEqual(const T &lhs, const T &rhs) @@ -109,6 +111,9 @@ GENRAND_REAL_FUNC(cl_float, 2) GENRAND_REAL_FUNC(cl_float, 4) GENRAND_REAL_FUNC(cl_double, 2) GENRAND_REAL_FUNC(cl_double, 4) +GENRAND_REAL_FUNC(cl_half, 2) +GENRAND_REAL_FUNC(cl_half, 4) +GENRAND_REAL_FUNC(cl_half, 8) template<> inline cl_half genrandReal<cl_half>(RandomSeed &seed) { @@ -157,6 +162,8 @@ Tv negOp(Tv in) return -in; } +inline cl_half negOpHalf(cl_half v) { return v ^ 0x8000; } + template<typename Tv> Tv notOp(Tv in) { diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h index f779ef37..d9dfc3b8 100644 --- a/test_conformance/subgroups/subgroup_common_templates.h +++ b/test_conformance/subgroups/subgroup_common_templates.h @@ -483,29 +483,30 @@ template <typename Ty, ShuffleOp operation> struct SHF static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m, const WorkGroupParams &test_params) { - int ii, i, j, k, n; + int ii, k; + size_t n; cl_uint l; - int nw = test_params.local_workgroup_size; - int ns = test_params.subgroup_size; + size_t nw = test_params.local_workgroup_size; + size_t ns = test_params.subgroup_size; int ng = test_params.global_workgroup_size; - int nj = (nw + ns - 1) / ns; + size_t nj = (nw + ns - 1) / ns; Ty tr, rr; ng = ng / nw; for (k = 0; k < ng; ++k) { // for each work_group - for (j = 0; j < nw; ++j) + for (size_t j = 0; j < nw; ++j) { // inside the work_group mx[j] = x[j]; // read host inputs for work_group my[j] = y[j]; // read device outputs for work_group } - for (j = 0; j < nj; ++j) + for (size_t j = 0; j < nj; ++j) { // for each subgroup ii = j * ns; n = ii + ns > nw ? nw - ii : ns; - for (i = 0; i < n; ++i) + for (size_t i = 0; i < n; ++i) { // inside the subgroup // shuffle index storage int midx = 4 * ii + 4 * i + 2; diff --git a/test_conformance/subgroups/subhelpers.cpp b/test_conformance/subgroups/subhelpers.cpp index 11268f64..440cde20 100644 --- a/test_conformance/subgroups/subhelpers.cpp +++ b/test_conformance/subgroups/subhelpers.cpp @@ -206,7 +206,7 @@ void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups, } void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values, - int sb_size) + size_t sb_size) { // max product is 720, cl_half has enough precision for it const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 }; diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h index bcb523cf..ed92e5d3 100644 --- a/test_conformance/subgroups/subhelpers.h +++ b/test_conformance/subgroups/subhelpers.h @@ -44,7 +44,7 @@ cl_uint4 generate_bit_mask(cl_uint subgroup_local_id, // for each subgroup values defined different values // for rest of workitems set 1 shuffle values void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values, - int sb_size); + size_t sb_size); struct WorkGroupParams { diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp index b69f3138..5b2a5eb8 100644 --- a/test_conformance/subgroups/test_workitem.cpp +++ b/test_conformance/subgroups/test_workitem.cpp @@ -36,7 +36,7 @@ struct get_test_data }; static int check_group(const get_test_data *result, int nw, cl_uint ensg, - int maxwgs) + size_t maxwgs) { int first = -1; int last = -1; @@ -168,7 +168,7 @@ static int check_group(const get_test_data *result, int nw, cl_uint ensg, j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId + (result[i].subGroupLocalId >> 5); - if (j < sizeof(hit) / 4) + if (j < static_cast<int>(sizeof(hit) / 4)) { cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU); if ((hit[j] & b) != 0) @@ -191,7 +191,7 @@ int test_work_item_functions(cl_device_id device, cl_context context, static const size_t lsize = 200; int error; int i, j, k, q, r, nw; - int maxwgs; + size_t maxwgs; cl_uint ensg; size_t global; size_t local; @@ -235,7 +235,7 @@ int test_work_item_functions(cl_device_id device, cl_context context, error = get_max_allowed_work_group_size(context, kernel, &local, NULL); if (error != 0) return error; - maxwgs = (int)local; + maxwgs = local; // Limit it a bit so we have muliple work groups // Ideally this will still be large enough to give us multiple subgroups diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp index 5901420a..eb1afeb0 100644 --- a/test_conformance/vulkan/main.cpp +++ b/test_conformance/vulkan/main.cpp @@ -52,7 +52,8 @@ static void params_reset() } extern int test_buffer_common(cl_device_id device_, cl_context context_, - cl_command_queue queue_, int numElements_); + cl_command_queue queue_, int numElements_, + float use_fence); extern int test_image_common(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_); @@ -61,7 +62,7 @@ int test_buffer_single_queue(cl_device_id device_, cl_context context_, { params_reset(); log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiple_queue(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -69,7 +70,7 @@ int test_buffer_multiple_queue(cl_device_id device_, cl_context context_, params_reset(); numCQ = 2; log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -78,7 +79,7 @@ int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_, multiImport = true; log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " "IN SAME CONTEXT...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); } int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -88,7 +89,45 @@ int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_, multiCtx = true; log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " "IN DIFFERENT CONTEXT...... \n\n"); - return test_buffer_common(device_, context_, queue_, numElements_); + return test_buffer_common(device_, context_, queue_, numElements_, false); +} +int test_buffer_single_queue_fence(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiple_queue_fence(cl_device_id device_, cl_context context_, + cl_command_queue queue_, int numElements_) +{ + params_reset(); + numCQ = 2; + log_info("RUNNING TEST WITH TWO QUEUE...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiImport_sameCtx_fence(cl_device_id device_, + cl_context context_, + cl_command_queue queue_, + int numElements_) +{ + params_reset(); + multiImport = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN SAME CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); +} +int test_buffer_multiImport_diffCtx_fence(cl_device_id device_, + cl_context context_, + cl_command_queue queue_, + int numElements_) +{ + params_reset(); + multiImport = true; + multiCtx = true; + log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT " + "IN DIFFERENT CONTEXT...... \n\n"); + return test_buffer_common(device_, context_, queue_, numElements_, true); } int test_image_single_queue(cl_device_id device_, cl_context context_, cl_command_queue queue_, int numElements_) @@ -110,6 +149,10 @@ test_definition test_list[] = { ADD_TEST(buffer_single_queue), ADD_TEST(buffer_multiple_queue), ADD_TEST(buffer_multiImport_sameCtx), ADD_TEST(buffer_multiImport_diffCtx), + ADD_TEST(buffer_single_queue_fence), + ADD_TEST(buffer_multiple_queue_fence), + ADD_TEST(buffer_multiImport_sameCtx_fence), + ADD_TEST(buffer_multiImport_diffCtx_fence), ADD_TEST(image_single_queue), ADD_TEST(image_multiple_queue), ADD_TEST(consistency_external_buffer), @@ -142,7 +185,6 @@ bool useSingleImageKernel = false; bool useDeviceLocal = false; bool disableNTHandleType = false; bool enableOffset = false; -bool non_dedicated = false; static void printUsage(const char *execName) { @@ -189,10 +231,6 @@ size_t parseParams(int argc, const char *argv[], const char **argList) { enableOffset = true; } - if (!strcmp(argv[i], "--non_dedicated")) - { - non_dedicated = true; - } if (strcmp(argv[i], "-h") == 0) { printUsage(argv[0]); diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp index f22ac319..d12b3bfe 100644 --- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp +++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp @@ -81,10 +81,11 @@ int test_consistency_external_buffer(cl_device_id deviceID, cl_context _context, const VulkanMemoryTypeList& memoryTypeList = vkDummyBuffer.getMemoryTypeList(); - VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory( - vkDevice, bufferSize, memoryTypeList[0], vkExternalMemoryHandleType); VulkanBufferList vkBufferList(1, vkDevice, bufferSize, vkExternalMemoryHandleType); + VulkanDeviceMemory* vkDeviceMem = + new VulkanDeviceMemory(vkDevice, vkBufferList[0], memoryTypeList[0], + vkExternalMemoryHandleType); vkDeviceMem->bindBuffer(vkBufferList[0], 0); @@ -231,22 +232,27 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context, VulkanExternalMemoryHandleType vkExternalMemoryHandleType = getSupportedVulkanExternalMemoryHandleTypeList()[0]; - VulkanImage2D* vkImage2D = - new VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height, - 1, vkExternalMemoryHandleType); - const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList(); - uint64_t totalImageMemSize = vkImage2D->getSize(); + VulkanImageTiling vulkanImageTiling = + vkClExternalMemoryHandleTilingAssumption( + deviceID, vkExternalMemoryHandleType, &errNum); + ASSERT_SUCCESS(errNum, "Failed to query OpenCL tiling mode"); + + VulkanImage2D vkImage2D = + VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height, + vulkanImageTiling, 1, vkExternalMemoryHandleType); + + const VulkanMemoryTypeList& memoryTypeList = vkImage2D.getMemoryTypeList(); + uint64_t totalImageMemSize = vkImage2D.getSize(); log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]); log_info("Memory type property: %d\n", memoryTypeList[0].getMemoryTypeProperty()); log_info("Image size : %d\n", totalImageMemSize); - VulkanDeviceMemory* vkDeviceMem = - new VulkanDeviceMemory(vkDevice, totalImageMemSize, memoryTypeList[0], - vkExternalMemoryHandleType); - vkDeviceMem->bindImage(*vkImage2D, 0); + VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory( + vkDevice, vkImage2D, memoryTypeList[0], vkExternalMemoryHandleType); + vkDeviceMem->bindImage(vkImage2D, 0); void* handle = NULL; int fd; @@ -299,7 +305,7 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context, extMemProperties.push_back(0); const VkImageCreateInfo VulkanImageCreateInfo = - vkImage2D->getVkImageCreateInfo(); + vkImage2D.getVkImageCreateInfo(); errNum = getCLImageInfoFromVkImageInfo( &VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc); diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp index 9b0bc9de..559625d7 100644 --- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp @@ -21,6 +21,7 @@ #include <assert.h> #include <vector> #include <iostream> +#include <memory> #include <string.h> #include "harness/errorHelpers.h" @@ -82,7 +83,8 @@ __kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __g int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, cl_command_queue &cmd_queue2, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize) + uint32_t numBuffers, uint32_t bufferSize, + bool use_fence) { int err = CL_SUCCESS; size_t global_work_size[1]; @@ -117,14 +119,18 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr<VulkanFence> fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); std::vector<char> vkBufferShader = readFile("buffer.spv"); VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -136,10 +142,17 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared<VulkanFence>(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); @@ -179,9 +192,9 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) { - vkBufferListDeviceMemory.push_back( - new VulkanDeviceMemory(vkDevice, bufferSize, memoryType, - vkExternalMemoryHandleType)); + vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory( + vkDevice, vkBufferList[bIdx], memoryType, + vkExternalMemoryHandleType)); externalMemory.push_back(new clExternalMemory( vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType, 0, bufferSize, context, deviceId)); @@ -200,8 +213,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); - vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]); } + vkDescriptorSet.updateArray(1, numBuffers, vkBufferList); vkCommandBuffer.begin(); vkCommandBuffer.bindPipeline(vkComputePipeline); vkCommandBuffer.bindDescriptorSets( @@ -227,16 +240,27 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); + err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); @@ -286,7 +310,14 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, goto CLEANUP; } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFlush(cmd_queue1); + clFlush(cmd_queue2); + clFinish(cmd_queue1); + clFinish(cmd_queue2); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue2); } @@ -387,8 +418,11 @@ CLEANUP: } if (program) clReleaseProgram(program); if (kernel_cq) clReleaseKernel(kernel_cq); - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); @@ -398,7 +432,7 @@ CLEANUP: int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, - uint32_t bufferSize) + uint32_t bufferSize, bool use_fence) { log_info("RUNNING TEST WITH ONE QUEUE...... \n\n"); size_t global_work_size[1]; @@ -416,13 +450,17 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr<VulkanFence> fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); std::vector<char> vkBufferShader = readFile("buffer.spv"); VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -434,10 +472,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared<VulkanFence>(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } + const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); @@ -477,9 +523,9 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, for (size_t bIdx = 0; bIdx < numBuffers; bIdx++) { - vkBufferListDeviceMemory.push_back( - new VulkanDeviceMemory(vkDevice, bufferSize, memoryType, - vkExternalMemoryHandleType)); + vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory( + vkDevice, vkBufferList[bIdx], memoryType, + vkExternalMemoryHandleType)); externalMemory.push_back(new clExternalMemory( vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType, 0, bufferSize, context, deviceId)); @@ -498,8 +544,9 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx], 0); buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer(); - vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]); } + vkDescriptorSet.updateArray(1, vkBufferList.size(), vkBufferList); + vkCommandBuffer.begin(); vkCommandBuffer.bindPipeline(vkComputePipeline); vkCommandBuffer.bindDescriptorSets( @@ -526,16 +573,26 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t), (void *)&bufferSize); @@ -562,7 +619,12 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, " error\n"); goto CLEANUP; } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFlush(cmd_queue1); + clFinish(cmd_queue1); + } + else if (!use_fence && (iter != (maxIter - 1))) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -656,8 +718,13 @@ CLEANUP: delete externalMemory[i]; } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } + if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); return err; @@ -666,7 +733,7 @@ CLEANUP: int run_test_with_multi_import_same_ctx( cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel, cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers, - uint32_t bufferSize, uint32_t bufferSizeForOffset) + uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence) { size_t global_work_size[1]; uint8_t *error_2; @@ -687,14 +754,18 @@ int run_test_with_multi_import_same_ctx( getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr<VulkanFence> fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); std::vector<char> vkBufferShader = readFile("buffer.spv"); VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -706,10 +777,18 @@ int run_test_with_multi_import_same_ctx( VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared<VulkanFence>(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + } + const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool); @@ -767,7 +846,7 @@ int run_test_with_multi_import_same_ctx( if (withOffset == 0) { vkBufferListDeviceMemory.push_back( - new VulkanDeviceMemory(vkDevice, pBufferSize, + new VulkanDeviceMemory(vkDevice, vkBufferList[bIdx], memoryType, vkExternalMemoryHandleType)); } @@ -811,9 +890,8 @@ int run_test_with_multi_import_same_ctx( externalMemory[bIdx][cl_bIdx] ->getExternalMemoryBuffer(); } - vkDescriptorSet.update((uint32_t)bIdx + 1, - vkBufferList[bIdx]); } + vkDescriptorSet.updateArray(1, numBuffers, vkBufferList); vkCommandBuffer.begin(); vkCommandBuffer.bindPipeline(vkComputePipeline); vkCommandBuffer.bindDescriptorSets( @@ -832,16 +910,34 @@ int run_test_with_multi_import_same_ctx( for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } } - clVk2CLExternalSemaphore->wait(cmd_queue1); + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore->wait(cmd_queue1); + } + for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) { @@ -874,7 +970,11 @@ int run_test_with_multi_import_same_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue1); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -987,8 +1087,13 @@ CLEANUP: } } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + } + if (error_2) free(error_2); if (error_1) clReleaseMemObject(error_1); return err; @@ -998,7 +1103,8 @@ int run_test_with_multi_import_diff_ctx( cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1, cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2, cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice, - uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset) + uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset, + float use_fence) { size_t global_work_size[1]; uint8_t *error_3; @@ -1023,6 +1129,7 @@ int run_test_with_multi_import_diff_ctx( getSupportedVulkanExternalSemaphoreHandleTypeList()[0]; VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType); VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType); + std::shared_ptr<VulkanFence> fence = nullptr; VulkanQueue &vkQueue = vkDevice.getQueue(); @@ -1042,15 +1149,24 @@ int run_test_with_multi_import_diff_ctx( VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool, vkDescriptorSetLayout); - clVk2CLExternalSemaphore = new clExternalSemaphore( - vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore = new clExternalSemaphore( - vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - - clVk2CLExternalSemaphore2 = new clExternalSemaphore( - vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); - clCl2VkExternalSemaphore2 = new clExternalSemaphore( - vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId); + if (use_fence) + { + fence = std::make_shared<VulkanFence>(vkDevice); + } + else + { + clVk2CLExternalSemaphore = new clExternalSemaphore( + vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore = new clExternalSemaphore( + vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); + + clVk2CLExternalSemaphore2 = + new clExternalSemaphore(vkVk2CLSemaphore, context2, + vkExternalSemaphoreHandleType, deviceId); + clCl2VkExternalSemaphore2 = + new clExternalSemaphore(vkCl2VkSemaphore, context2, + vkExternalSemaphoreHandleType, deviceId); + } const uint32_t maxIter = innerIterations; VulkanCommandPool vkCommandPool(vkDevice); @@ -1192,16 +1308,33 @@ int run_test_with_multi_import_diff_ctx( for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + } + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore->wait(cmd_queue1); } - clVk2CLExternalSemaphore->wait(cmd_queue1); for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) @@ -1235,7 +1368,11 @@ int run_test_with_multi_import_diff_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue1); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore->signal(cmd_queue1); } @@ -1243,16 +1380,33 @@ int run_test_with_multi_import_diff_ctx( clFinish(cmd_queue1); for (uint32_t iter = 0; iter < maxIter; iter++) { - if (iter == 0) + if (use_fence) { - vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + fence->reset(); + vkQueue.submit(vkCommandBuffer, fence); + fence->wait(); } else { - vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, - vkVk2CLSemaphore); + if (iter == 0) + { + vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore); + } + else + { + vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer, + vkVk2CLSemaphore); + } + } + + if (use_fence) + { + fence->wait(); + } + else + { + clVk2CLExternalSemaphore2->wait(cmd_queue2); } - clVk2CLExternalSemaphore2->wait(cmd_queue2); for (uint8_t launchIter = 0; launchIter < numImports; launchIter++) @@ -1286,7 +1440,11 @@ int run_test_with_multi_import_diff_ctx( goto CLEANUP; } } - if (iter != (maxIter - 1)) + if (use_fence) + { + clFinish(cmd_queue2); + } + else if (!use_fence && iter != (maxIter - 1)) { clCl2VkExternalSemaphore2->signal(cmd_queue2); } @@ -1474,10 +1632,15 @@ CLEANUP: } } } - if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; - if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; - if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2; - if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2; + + if (!use_fence) + { + if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore; + if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore; + if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2; + if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2; + } + if (error_3) free(error_3); if (error_1) clReleaseMemObject(error_1); if (error_2) clReleaseMemObject(error_2); @@ -1485,7 +1648,8 @@ CLEANUP: } int test_buffer_common(cl_device_id device_, cl_context context_, - cl_command_queue queue_, int numElements_) + cl_command_queue queue_, int numElements_, + float use_fence) { int current_device = 0; @@ -1738,26 +1902,26 @@ int test_buffer_common(cl_device_id device_, cl_context context_, { errNum = run_test_with_multi_import_same_ctx( context, cmd_queue1, kernel, verify_kernel, vkDevice, - numBuffers, bufferSize, bufferSizeForOffset); + numBuffers, bufferSize, bufferSizeForOffset, use_fence); } else if (multiImport && multiCtx) { errNum = run_test_with_multi_import_diff_ctx( context, context2, cmd_queue1, cmd_queue3, kernel, kernel2, verify_kernel, verify_kernel2, vkDevice, numBuffers, - bufferSize, bufferSizeForOffset); + bufferSize, bufferSizeForOffset, use_fence); } else if (numCQ == 2) { errNum = run_test_with_two_queue( context, cmd_queue1, cmd_queue2, kernel, verify_kernel, - vkDevice, numBuffers + 1, bufferSize); + vkDevice, numBuffers + 1, bufferSize, use_fence); } else { - errNum = run_test_with_one_queue(context, cmd_queue1, kernel, - verify_kernel, vkDevice, - numBuffers, bufferSize); + errNum = run_test_with_one_queue( + context, cmd_queue1, kernel, verify_kernel, vkDevice, + numBuffers, bufferSize, use_fence); } if (errNum != CL_SUCCESS) { diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp index 47a31665..5f1f6e4b 100644 --- a/test_conformance/vulkan/test_vulkan_interop_image.cpp +++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp @@ -226,9 +226,11 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, srcBufferPtr = (char *)malloc(maxImage2DSize); dstBufferPtr = (char *)malloc(maxImage2DSize); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, - VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -255,10 +257,10 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, clCl2VkExternalSemaphore = new clExternalSemaphore( vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1; - std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2; - std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1; - std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2; + std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1; + std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2; + std::vector<clExternalMemoryImage *> externalMemory1; + std::vector<clExternalMemoryImage *> externalMemory2; std::vector<char> vkImage2DShader; for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) @@ -352,8 +354,6 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, VulkanExternalMemoryHandleType vkExternalMemoryHandleType = vkExternalMemoryHandleTypeList[emhtIdx]; - log_info("External memory handle type: %d \n", - vkExternalMemoryHandleType); if ((true == disableNTHandleType) && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT == vkExternalMemoryHandleType)) @@ -361,9 +361,19 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, // Skip running for WIN32 NT handle. continue; } + log_info("External memory handle type: %d \n", + vkExternalMemoryHandleType); + VulkanImageTiling vulkanImageTiling = + vkClExternalMemoryHandleTilingAssumption( + deviceId, + vkExternalMemoryHandleTypeList[emhtIdx], &err); + ASSERT_SUCCESS(err, + "Failed to query OpenCL tiling mode"); + VulkanImage2D vkDummyImage2D( vkDevice, vkFormatList[0], widthList[0], - heightList[0], 1, vkExternalMemoryHandleType); + heightList[0], vulkanImageTiling, 1, + vkExternalMemoryHandleType); const VulkanMemoryTypeList &memoryTypeList = vkDummyImage2D.getMemoryTypeList(); @@ -390,118 +400,73 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize); totalImageMemSize = ROUND_UP(vkImage2D.getSize(), vkImage2D.getAlignment()); } - VulkanImage2DList vkNonDedicatedImage2DList( + VulkanImage2DList vkImage2DList( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); for (size_t bIdx = 0; bIdx < num2DImages; bIdx++) { - if (non_dedicated) - { - vkNonDedicatedImage2DListDeviceMemory1 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, - memoryType, - vkExternalMemoryHandleType)); - } - else - { - vkNonDedicatedImage2DListDeviceMemory1 - .push_back(new VulkanDeviceMemory( - vkDevice, - vkNonDedicatedImage2DList[bIdx], - memoryType, - vkExternalMemoryHandleType)); - } - vkNonDedicatedImage2DListDeviceMemory1[bIdx] - ->bindImage(vkNonDedicatedImage2DList[bIdx], - 0); - nonDedicatedExternalMemory1.push_back( + vkImage2DListDeviceMemory1.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList[bIdx], + memoryType, + vkExternalMemoryHandleType)); + vkImage2DListDeviceMemory1[bIdx]->bindImage( + vkImage2DList[bIdx], 0); + externalMemory1.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory1 - [bIdx], + *vkImage2DListDeviceMemory1[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList[bIdx], - deviceId)); + vkImage2DList[bIdx], deviceId)); } - VulkanImageViewList vkNonDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList); - VulkanImage2DList vkNonDedicatedImage2DList2( + VulkanImageViewList vkImage2DViewList( + vkDevice, vkImage2DList); + VulkanImage2DList vkImage2DList2( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); for (size_t bIdx = 0; bIdx < num2DImages; bIdx++) { - if (non_dedicated) - { - vkNonDedicatedImage2DListDeviceMemory2 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, - memoryType, - vkExternalMemoryHandleType)); - } - else - { - vkNonDedicatedImage2DListDeviceMemory2 - .push_back(new VulkanDeviceMemory( - vkDevice, - vkNonDedicatedImage2DList2[bIdx], - memoryType, - vkExternalMemoryHandleType)); - } - vkNonDedicatedImage2DListDeviceMemory2[bIdx] - ->bindImage( - vkNonDedicatedImage2DList2[bIdx], 0); - nonDedicatedExternalMemory2.push_back( + vkImage2DListDeviceMemory2.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList2[bIdx], + memoryType, + vkExternalMemoryHandleType)); + vkImage2DListDeviceMemory2[bIdx]->bindImage( + vkImage2DList2[bIdx], 0); + externalMemory2.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory2 - [bIdx], + *vkImage2DListDeviceMemory2[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList2[bIdx], - deviceId)); + vkImage2DList2[bIdx], deviceId)); } - VulkanImageViewList vkDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList2); cl_mem external_mem_image1[5]; cl_mem external_mem_image2[5]; for (int i = 0; i < num2DImages; i++) { external_mem_image1[i] = - nonDedicatedExternalMemory1[i] + externalMemory1[i] ->getExternalMemoryImage(); external_mem_image2[i] = - nonDedicatedExternalMemory2[i] + externalMemory2[i] ->getExternalMemoryImage(); } - VulkanImage2DList &vkImage2DList = - vkNonDedicatedImage2DList; - VulkanImageViewList &vkImage2DViewList = - vkNonDedicatedImage2DViewList; clCl2VkExternalSemaphore->signal(cmd_queue1); if (!useSingleImageKernel) { - for (size_t i2DIdx = 0; - i2DIdx < vkImage2DList.size(); i2DIdx++) - { - for (uint32_t mipLevel = 0; - mipLevel < numMipLevels; mipLevel++) - { - uint32_t i2DvIdx = - (uint32_t)(i2DIdx * numMipLevels) - + mipLevel; - vkDescriptorSet.update( - 1 + i2DvIdx, - vkImage2DViewList[i2DvIdx]); - } - } + vkDescriptorSet.updateArray(1, + vkImage2DViewList); vkCopyCommandBuffer.begin(); vkCopyCommandBuffer.pipelineBarrier( vkImage2DList, @@ -743,29 +708,25 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1, } for (int i = 0; i < num2DImages; i++) { - delete vkNonDedicatedImage2DListDeviceMemory1 - [i]; - delete vkNonDedicatedImage2DListDeviceMemory2 - [i]; - delete nonDedicatedExternalMemory1[i]; - delete nonDedicatedExternalMemory2[i]; + delete vkImage2DListDeviceMemory1[i]; + delete vkImage2DListDeviceMemory2[i]; + delete externalMemory1[i]; + delete externalMemory2[i]; } - vkNonDedicatedImage2DListDeviceMemory1.erase( - vkNonDedicatedImage2DListDeviceMemory1.begin(), - vkNonDedicatedImage2DListDeviceMemory1.begin() - + num2DImages); - vkNonDedicatedImage2DListDeviceMemory2.erase( - vkNonDedicatedImage2DListDeviceMemory2.begin(), - vkNonDedicatedImage2DListDeviceMemory2.begin() + vkImage2DListDeviceMemory1.erase( + vkImage2DListDeviceMemory1.begin(), + vkImage2DListDeviceMemory1.begin() + num2DImages); - nonDedicatedExternalMemory1.erase( - nonDedicatedExternalMemory1.begin(), - nonDedicatedExternalMemory1.begin() - + num2DImages); - nonDedicatedExternalMemory2.erase( - nonDedicatedExternalMemory2.begin(), - nonDedicatedExternalMemory2.begin() + vkImage2DListDeviceMemory2.erase( + vkImage2DListDeviceMemory2.begin(), + vkImage2DListDeviceMemory2.begin() + num2DImages); + externalMemory1.erase(externalMemory1.begin(), + externalMemory1.begin() + + num2DImages); + externalMemory2.erase(externalMemory2.begin(), + externalMemory2.begin() + + num2DImages); if (CL_SUCCESS != err) { goto CLEANUP; @@ -822,9 +783,11 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, srcBufferPtr = (char *)malloc(maxImage2DSize); dstBufferPtr = (char *)malloc(maxImage2DSize); - VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList( - VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, - VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); + VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList; + vkDescriptorSetLayoutBindingList.addBinding( + 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1); + vkDescriptorSetLayoutBindingList.addBinding( + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS); VulkanDescriptorSetLayout vkDescriptorSetLayout( vkDevice, vkDescriptorSetLayoutBindingList); VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout); @@ -851,10 +814,10 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, clCl2VkExternalSemaphore = new clExternalSemaphore( vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId); - std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1; - std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2; - std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1; - std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2; + std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1; + std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2; + std::vector<clExternalMemoryImage *> externalMemory1; + std::vector<clExternalMemoryImage *> externalMemory2; std::vector<char> vkImage2DShader; for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++) @@ -957,9 +920,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, // Skip running for WIN32 NT handle. continue; } + + VulkanImageTiling vulkanImageTiling = + vkClExternalMemoryHandleTilingAssumption( + deviceId, + vkExternalMemoryHandleTypeList[emhtIdx], &err); + ASSERT_SUCCESS(err, + "Failed to query OpenCL tiling mode"); + VulkanImage2D vkDummyImage2D( vkDevice, vkFormatList[0], widthList[0], - heightList[0], 1, vkExternalMemoryHandleType); + heightList[0], vulkanImageTiling, 1, + vkExternalMemoryHandleType); const VulkanMemoryTypeList &memoryTypeList = vkDummyImage2D.getMemoryTypeList(); @@ -985,98 +957,78 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, { VulkanImage2D vkImage2D( vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize); totalImageMemSize = ROUND_UP(vkImage2D.getSize(), vkImage2D.getAlignment()); } - VulkanImage2DList vkNonDedicatedImage2DList( + VulkanImage2DList vkImage2DList( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); - for (size_t bIdx = 0; - bIdx < vkNonDedicatedImage2DList.size(); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); + for (size_t bIdx = 0; bIdx < vkImage2DList.size(); bIdx++) { // Create list of Vulkan device memories and // bind the list of Vulkan images. - vkNonDedicatedImage2DListDeviceMemory1 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, memoryType, + vkImage2DListDeviceMemory1.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList[bIdx], + memoryType, vkExternalMemoryHandleType)); - vkNonDedicatedImage2DListDeviceMemory1[bIdx] - ->bindImage(vkNonDedicatedImage2DList[bIdx], - 0); - nonDedicatedExternalMemory1.push_back( + vkImage2DListDeviceMemory1[bIdx]->bindImage( + vkImage2DList[bIdx], 0); + externalMemory1.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory1 - [bIdx], + *vkImage2DListDeviceMemory1[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList[bIdx], - deviceId)); + vkImage2DList[bIdx], deviceId)); } - VulkanImageViewList vkNonDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList); + VulkanImageViewList vkImage2DViewList( + vkDevice, vkImage2DList); - VulkanImage2DList vkNonDedicatedImage2DList2( + VulkanImage2DList vkImage2DList2( num2DImages, vkDevice, vkFormat, width, height, - numMipLevels, vkExternalMemoryHandleType); - for (size_t bIdx = 0; - bIdx < vkNonDedicatedImage2DList2.size(); + vulkanImageTiling, numMipLevels, + vkExternalMemoryHandleType); + for (size_t bIdx = 0; bIdx < vkImage2DList2.size(); bIdx++) { - vkNonDedicatedImage2DListDeviceMemory2 - .push_back(new VulkanDeviceMemory( - vkDevice, totalImageMemSize, memoryType, + vkImage2DListDeviceMemory2.push_back( + new VulkanDeviceMemory( + vkDevice, vkImage2DList2[bIdx], + memoryType, vkExternalMemoryHandleType)); - vkNonDedicatedImage2DListDeviceMemory2[bIdx] - ->bindImage( - vkNonDedicatedImage2DList2[bIdx], 0); - nonDedicatedExternalMemory2.push_back( + vkImage2DListDeviceMemory2[bIdx]->bindImage( + vkImage2DList2[bIdx], 0); + externalMemory2.push_back( new clExternalMemoryImage( - *vkNonDedicatedImage2DListDeviceMemory2 - [bIdx], + *vkImage2DListDeviceMemory2[bIdx], vkExternalMemoryHandleType, context, totalImageMemSize, width, height, 0, - vkNonDedicatedImage2DList2[bIdx], - deviceId)); + vkImage2DList2[bIdx], deviceId)); } - VulkanImageViewList vkDedicatedImage2DViewList( - vkDevice, vkNonDedicatedImage2DList2); + cl_mem external_mem_image1[4]; cl_mem external_mem_image2[4]; for (int i = 0; i < num2DImages; i++) { external_mem_image1[i] = - nonDedicatedExternalMemory1[i] + externalMemory1[i] ->getExternalMemoryImage(); external_mem_image2[i] = - nonDedicatedExternalMemory2[i] + externalMemory2[i] ->getExternalMemoryImage(); } - VulkanImage2DList &vkImage2DList = - vkNonDedicatedImage2DList; - VulkanImageViewList &vkImage2DViewList = - vkNonDedicatedImage2DViewList; clCl2VkExternalSemaphore->signal(cmd_queue1); if (!useSingleImageKernel) { - for (size_t i2DIdx = 0; - i2DIdx < vkImage2DList.size(); i2DIdx++) - { - for (uint32_t mipLevel = 0; - mipLevel < numMipLevels; mipLevel++) - { - uint32_t i2DvIdx = - (uint32_t)(i2DIdx * numMipLevels) - + mipLevel; - vkDescriptorSet.update( - 1 + i2DvIdx, - vkImage2DViewList[i2DvIdx]); - } - } + vkDescriptorSet.updateArray(1, + vkImage2DViewList); vkCopyCommandBuffer.begin(); vkCopyCommandBuffer.pipelineBarrier( vkImage2DList, @@ -1275,29 +1227,25 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1, } for (int i = 0; i < num2DImages; i++) { - delete vkNonDedicatedImage2DListDeviceMemory1 - [i]; - delete vkNonDedicatedImage2DListDeviceMemory2 - [i]; - delete nonDedicatedExternalMemory1[i]; - delete nonDedicatedExternalMemory2[i]; + delete vkImage2DListDeviceMemory1[i]; + delete vkImage2DListDeviceMemory2[i]; + delete externalMemory1[i]; + delete externalMemory2[i]; } - vkNonDedicatedImage2DListDeviceMemory1.erase( - vkNonDedicatedImage2DListDeviceMemory1.begin(), - vkNonDedicatedImage2DListDeviceMemory1.begin() - + num2DImages); - vkNonDedicatedImage2DListDeviceMemory2.erase( - vkNonDedicatedImage2DListDeviceMemory2.begin(), - vkNonDedicatedImage2DListDeviceMemory2.begin() - + num2DImages); - nonDedicatedExternalMemory1.erase( - nonDedicatedExternalMemory1.begin(), - nonDedicatedExternalMemory1.begin() + vkImage2DListDeviceMemory1.erase( + vkImage2DListDeviceMemory1.begin(), + vkImage2DListDeviceMemory1.begin() + num2DImages); - nonDedicatedExternalMemory2.erase( - nonDedicatedExternalMemory2.begin(), - nonDedicatedExternalMemory2.begin() + vkImage2DListDeviceMemory2.erase( + vkImage2DListDeviceMemory2.begin(), + vkImage2DListDeviceMemory2.begin() + num2DImages); + externalMemory1.erase(externalMemory1.begin(), + externalMemory1.begin() + + num2DImages); + externalMemory2.erase(externalMemory2.begin(), + externalMemory2.begin() + + num2DImages); if (CL_SUCCESS != err) { goto CLEANUP; diff --git a/test_conformance/vulkan/vulkan_interop_common.hpp b/test_conformance/vulkan/vulkan_interop_common.hpp index 18d84f09..a1162407 100644 --- a/test_conformance/vulkan/vulkan_interop_common.hpp +++ b/test_conformance/vulkan/vulkan_interop_common.hpp @@ -45,6 +45,5 @@ extern bool useDeviceLocal; extern bool disableNTHandleType; // Enable offset for multiImport of vulkan device memory extern bool enableOffset; -extern bool non_dedicated; #endif // _vulkan_interop_common_hpp_ diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp index 41abd124..f9b574e4 100644 --- a/test_conformance/workgroups/test_wg_all.cpp +++ b/test_conformance/workgroups/test_wg_all.cpp @@ -75,7 +75,6 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp index e0242cfb..f7ff899a 100644 --- a/test_conformance/workgroups/test_wg_any.cpp +++ b/test_conformance/workgroups/test_wg_any.cpp @@ -75,7 +75,6 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -110,7 +109,7 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<(num_elements+1); i++) + for (size_t i = 0; i < (num_elements + 1); i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp index e24ac7b9..a4cb0c6f 100644 --- a/test_conformance/workgroups/test_wg_broadcast.cpp +++ b/test_conformance/workgroups/test_wg_broadcast.cpp @@ -70,7 +70,7 @@ verify_wg_broadcast_1D(float *inptr, float *outptr, size_t n, size_t wg_size) for (i=0,group_id=0; i<n; i+=wg_size,group_id++) { - int local_size = (n-i) > wg_size ? wg_size : (n-i); + size_t local_size = (n - i) > wg_size ? wg_size : (n - i); float broadcast_result = inptr[i + (group_id % local_size)]; for (j=0; j<local_size; j++) { @@ -172,7 +172,6 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command size_t wg_size[1]; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -207,7 +206,7 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } @@ -278,7 +277,6 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command size_t num_workgroups; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -333,7 +331,7 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } @@ -402,7 +400,6 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command size_t num_workgroups; size_t num_elements; int err; - int i; MTdata d; err = create_single_kernel_helper(context, &program, &kernel, 1, @@ -458,7 +455,7 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command p = input_ptr[0]; d = init_genrand( gRandomSeed ); - for (i=0; i<num_elements; i++) + for (size_t i = 0; i < num_elements; i++) { p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d); } diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp index 648e68ce..a31fca63 100644 --- a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp +++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp @@ -219,10 +219,8 @@ int do_test(cl_device_id device, cl_context context, cl_command_queue queue, int do_test_work_group_suggested_local_size( cl_device_id device, cl_context context, cl_command_queue queue, bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr, - cl_long max_local_mem_size, size_t global_work_offset[], num_dims dim) + cl_ulong max_local_mem_size, size_t global_work_offset[], num_dims dim) { - clProgramWrapper scan_program; - clKernelWrapper scan_kernel; int err; size_t test_values[] = { 1, 1, 1 }; std::string kernel_names[6] = { @@ -244,6 +242,8 @@ int do_test_work_group_suggested_local_size( for (int kernel_num = 0; kernel_num < 6; kernel_num++) { if (max_local_mem_size < local_mem_size[kernel_num]) continue; + clProgramWrapper scan_program; + clKernelWrapper scan_kernel; // Create the kernel err = create_single_kernel_helper( context, &scan_program, &scan_kernel, 1, @@ -300,7 +300,7 @@ int test_work_group_suggested_local_size_1D(cl_device_id device, "Skipping the test.\n"); return TEST_SKIPPED_ITSELF; } - cl_long max_local_mem_size; + cl_ulong max_local_mem_size; cl_int err = clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(max_local_mem_size), &max_local_mem_size, NULL); |