aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSadaf Ebrahimi <sadafebrahimi@google.com>2023-10-17 19:27:15 +0000
committerAutomerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>2023-10-17 19:27:15 +0000
commitfa4ba9dfa1f52ebf659adcb7d23e8294eb866d64 (patch)
tree4aace48f642ba4929dd631f3bc46285d3c853c3e
parent150005d3d187a681ba110128ab0272b18cbcb468 (diff)
parent3cc456717b7499d7e06ac59aa5c419c6e5e5b3b8 (diff)
downloadOpenCL-CTS-fa4ba9dfa1f52ebf659adcb7d23e8294eb866d64.tar.gz
Upgrade OpenCL-CTS to v2023-10-10-00 am: 3cc456717b
Original change: https://android-review.googlesource.com/c/platform/external/OpenCL-CTS/+/2793473 Change-Id: I79bd9760da26a23f2e2b494cff88a3e3215e9b63 Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
-rw-r--r--.github/workflows/presubmit.yml12
-rw-r--r--CMakeLists.txt4
-rw-r--r--METADATA6
-rwxr-xr-xcheck-format.sh2
-rwxr-xr-xpresubmit.sh1
-rw-r--r--test_common/gl/helpers.cpp99
-rw-r--r--test_common/harness/compat.h6
-rw-r--r--test_common/harness/crc32.h4
-rw-r--r--test_common/harness/mt19937.cpp10
-rw-r--r--test_common/harness/stringHelpers.h42
-rw-r--r--test_common/harness/testHarness.cpp4
-rw-r--r--test_common/harness/typeWrappers.h42
-rw-r--r--test_conformance/SVM/CMakeLists.txt2
-rw-r--r--test_conformance/allocations/CMakeLists.txt2
-rw-r--r--test_conformance/allocations/allocation_execute.cpp36
-rw-r--r--test_conformance/allocations/main.cpp1
-rw-r--r--test_conformance/api/test_mem_object_info.cpp3
-rw-r--r--test_conformance/api/test_null_buffer_arg.cpp25
-rw-r--r--test_conformance/api/test_queries.cpp9
-rw-r--r--test_conformance/api/test_queue_properties.cpp5
-rw-r--r--test_conformance/atomics/test_indexed_cases.cpp48
-rw-r--r--test_conformance/basic/CMakeLists.txt8
-rw-r--r--test_conformance/basic/main.cpp43
-rw-r--r--test_conformance/basic/procs.h16
-rw-r--r--test_conformance/basic/test_astype.cpp214
-rw-r--r--test_conformance/basic/test_async_copy.cpp69
-rw-r--r--test_conformance/basic/test_async_copy2D.cpp182
-rw-r--r--test_conformance/basic/test_async_copy3D.cpp204
-rw-r--r--test_conformance/basic/test_async_strided_copy.cpp87
-rw-r--r--test_conformance/basic/test_barrier.cpp189
-rw-r--r--test_conformance/basic/test_constant.cpp351
-rw-r--r--test_conformance/basic/test_enqueue_map.cpp310
-rw-r--r--test_conformance/basic/test_fpmath.cpp386
-rw-r--r--test_conformance/basic/test_fpmath_float.cpp196
-rw-r--r--test_conformance/basic/test_get_linear_ids.cpp14
-rw-r--r--test_conformance/basic/test_hiloeo.cpp363
-rw-r--r--test_conformance/basic/test_image_r8.cpp196
-rw-r--r--test_conformance/basic/test_int2float.cpp143
-rw-r--r--test_conformance/basic/test_int2fp.cpp325
-rw-r--r--test_conformance/basic/test_intmath.cpp4
-rw-r--r--test_conformance/basic/test_loop.cpp210
-rw-r--r--test_conformance/basic/test_progvar.cpp18
-rw-r--r--test_conformance/basic/test_vec_type_hint.cpp152
-rw-r--r--test_conformance/basic/test_vector_creation.cpp489
-rw-r--r--test_conformance/basic/test_vector_swizzle.cpp21
-rw-r--r--test_conformance/basic/test_vloadstore.cpp800
-rw-r--r--test_conformance/basic/test_wg_barrier.cpp159
-rw-r--r--test_conformance/basic/test_work_item_functions.cpp3
-rw-r--r--test_conformance/c11_atomics/CMakeLists.txt2
-rw-r--r--test_conformance/c11_atomics/common.h6
-rw-r--r--test_conformance/c11_atomics/host_atomics.h6
-rw-r--r--test_conformance/c11_atomics/test_atomics.cpp2
-rw-r--r--test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp123
-rw-r--r--test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp4
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp8
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp23
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp8
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_utility.cpp31
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_utility.hpp5
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp290
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp38
-rw-r--r--test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp4
-rw-r--r--test_conformance/commonfns/main.cpp43
-rw-r--r--test_conformance/commonfns/test_base.h86
-rw-r--r--test_conformance/commonfns/test_binary_fn.cpp74
-rw-r--r--test_conformance/commonfns/test_clamp.cpp83
-rw-r--r--test_conformance/commonfns/test_mix.cpp120
-rw-r--r--test_conformance/commonfns/test_smoothstep.cpp123
-rw-r--r--test_conformance/commonfns/test_step.cpp61
-rw-r--r--test_conformance/commonfns/test_unary_fn.cpp155
-rw-r--r--test_conformance/compiler/test_compile.cpp3756
-rw-r--r--test_conformance/compiler/test_compiler_defines_for_extensions.cpp1
-rw-r--r--test_conformance/computeinfo/main.cpp4
-rw-r--r--test_conformance/conversions/CMakeLists.txt2
-rw-r--r--test_conformance/conversions/basic_test_conversions.cpp3307
-rw-r--r--test_conformance/conversions/basic_test_conversions.h348
-rw-r--r--test_conformance/conversions/conversions_data_info.h792
-rw-r--r--test_conformance/conversions/fplib.h5
-rw-r--r--test_conformance/conversions/test_conversions.cpp1339
-rw-r--r--test_conformance/d3d10/harness.h4
-rw-r--r--test_conformance/device_execution/CMakeLists.txt4
-rw-r--r--test_conformance/device_execution/enqueue_block.cpp1061
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt4
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp25
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h21
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt8
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp12
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp847
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h56
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp170
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp167
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp427
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp75
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp174
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp454
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h51
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp7
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp85
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp69
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp2
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp36
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h6
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp273
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp135
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/main.cpp7
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/procs.h16
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp94
-rw-r--r--test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h42
-rw-r--r--test_conformance/extensions/cl_khr_external_semaphore/procs.h6
-rw-r--r--test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp427
-rw-r--r--test_conformance/extensions/cl_khr_semaphore/main.cpp6
-rw-r--r--test_conformance/extensions/cl_khr_semaphore/procs.h12
-rw-r--r--test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp403
-rw-r--r--test_conformance/geometrics/CMakeLists.txt2
-rw-r--r--test_conformance/gl/test_images_write_common.cpp1
-rw-r--r--test_conformance/images/clCopyImage/test_copy_generic.cpp59
-rw-r--r--test_conformance/images/clCopyImage/test_loops.cpp98
-rw-r--r--test_conformance/images/clFillImage/test_loops.cpp55
-rw-r--r--test_conformance/images/kernel_read_write/CMakeLists.txt2
-rw-r--r--test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp12
-rw-r--r--test_conformance/math_brute_force/CMakeLists.txt2
-rw-r--r--test_conformance/math_brute_force/i_unary_double.cpp6
-rw-r--r--test_conformance/math_brute_force/i_unary_float.cpp6
-rw-r--r--test_conformance/math_brute_force/macro_unary_float.cpp11
-rw-r--r--test_conformance/math_brute_force/unary_two_results_float.cpp3
-rw-r--r--test_conformance/mem_host_flags/C_host_memory_block.h34
-rw-r--r--test_conformance/mem_host_flags/checker.h2
-rw-r--r--test_conformance/non_uniform_work_group/CMakeLists.txt2
-rw-r--r--test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp10
-rw-r--r--test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h7
-rw-r--r--test_conformance/non_uniform_work_group/tools.h6
-rw-r--r--test_conformance/pipes/kernels.h6
-rw-r--r--test_conformance/pipes/test_pipe_limits.cpp13
-rw-r--r--test_conformance/relationals/test_comparisons_fp.cpp35
-rw-r--r--test_conformance/relationals/test_comparisons_fp.h7
-rw-r--r--test_conformance/select/test_select.cpp320
-rw-r--r--test_conformance/select/test_select.h24
-rw-r--r--test_conformance/select/util_select.cpp826
-rw-r--r--test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm3235
-rw-r--r--test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm6439
-rw-r--r--test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm3242
-rw-r--r--test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm6447
-rw-r--r--test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm3243
-rw-r--r--test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm6448
-rw-r--r--test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm3246
-rw-r--r--test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm6450
-rw-r--r--test_conformance/spirv_new/test_op_negate.cpp40
-rw-r--r--test_conformance/spirv_new/test_op_vector_extract.cpp52
-rw-r--r--test_conformance/spirv_new/test_op_vector_insert.cpp52
-rw-r--r--test_conformance/spirv_new/test_op_vector_times_scalar.cpp14
-rw-r--r--test_conformance/spirv_new/types.hpp7
-rw-r--r--test_conformance/subgroups/subgroup_common_templates.h15
-rw-r--r--test_conformance/subgroups/subhelpers.cpp2
-rw-r--r--test_conformance/subgroups/subhelpers.h2
-rw-r--r--test_conformance/subgroups/test_workitem.cpp8
-rw-r--r--test_conformance/vulkan/main.cpp58
-rw-r--r--test_conformance/vulkan/test_vulkan_api_consistency.cpp30
-rw-r--r--test_conformance/vulkan/test_vulkan_interop_buffer.cpp342
-rw-r--r--test_conformance/vulkan/test_vulkan_interop_image.cpp332
-rw-r--r--test_conformance/vulkan/vulkan_interop_common.hpp1
-rw-r--r--test_conformance/workgroups/test_wg_all.cpp3
-rw-r--r--test_conformance/workgroups/test_wg_any.cpp3
-rw-r--r--test_conformance/workgroups/test_wg_broadcast.cpp11
-rw-r--r--test_conformance/workgroups/test_wg_suggested_local_work_size.cpp8
164 files changed, 14017 insertions, 10412 deletions
diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 1ba63abd..635e4a7e 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -13,16 +13,16 @@ jobs:
fail-fast: false
matrix:
mainmatrix: [true]
- os: [ubuntu-20.04, macos-latest, windows-latest]
+ os: [ubuntu-22.04, macos-latest, windows-latest]
include:
- - os: ubuntu-20.04
+ - os: ubuntu-22.04
mainmatrix: true
gl: 1
extra: " gl"
- - os: ubuntu-20.04
+ - os: ubuntu-22.04
mainmatrix: false
arch: arm
- - os: ubuntu-20.04
+ - os: ubuntu-22.04
mainmatrix: false
arch: aarch64
debug: 1
@@ -55,10 +55,10 @@ jobs:
run: ./presubmit.sh
formatcheck:
name: Check code format
- runs-on: ubuntu-20.04
+ runs-on: ubuntu-22.04
steps:
- name: Install packages
- run: sudo apt install -y clang-format clang-format-9
+ run: sudo apt install -y clang-format clang-format-11
- uses: actions/checkout@v3
with:
fetch-depth: 0
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4fce58d8..6c9bbf6f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -103,10 +103,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
if(NOT CMAKE_BUILD_TYPE MATCHES "Release|RelWithDebInfo|MinSizeRel")
# Enable more warnings if not doing a release build.
add_cxx_flag_if_supported(-Wall)
- # Suppress warnings that currently trigger on the code base.
- # This list should shrink over time when warnings are fixed.
- add_cxx_flag_if_supported(-Wno-sometimes-uninitialized)
- add_cxx_flag_if_supported(-Wno-sign-compare)
endif()
add_cxx_flag_if_supported(-Wno-narrowing)
add_cxx_flag_if_supported(-Wno-format)
diff --git a/METADATA b/METADATA
index 41886b71..b9540060 100644
--- a/METADATA
+++ b/METADATA
@@ -9,11 +9,11 @@ third_party {
type: GIT
value: "https://github.com/KhronosGroup/OpenCL-CTS.git"
}
- version: "v2023-05-16-00"
+ version: "v2023-10-10-00"
license_type: NOTICE
last_upgrade_date {
year: 2023
- month: 7
- day: 25
+ month: 10
+ day: 17
}
}
diff --git a/check-format.sh b/check-format.sh
index be8f9d78..b5dc0a72 100755
--- a/check-format.sh
+++ b/check-format.sh
@@ -2,7 +2,7 @@
# Arg used to specify non-'origin/main' comparison branch
ORIGIN_BRANCH=${1:-"origin/main"}
-CLANG_BINARY=${2:-"`which clang-format-9`"}
+CLANG_BINARY=${2:-"`which clang-format-11`"}
# Run git-clang-format to check for violations
CLANG_FORMAT_OUTPUT=$(git-clang-format --diff $ORIGIN_BRANCH --extensions c,cpp,h,hpp --binary $CLANG_BINARY)
diff --git a/presubmit.sh b/presubmit.sh
index 605c10b0..10354abf 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -77,7 +77,6 @@ cmake .. -G Ninja \
-DBUILD_WSI_XLIB_SUPPORT=OFF \
-DBUILD_WSI_XCB_SUPPORT=OFF \
-DBUILD_WSI_WAYLAND_SUPPORT=OFF \
- -DUSE_GAS=OFF \
-C helper.cmake ..
cmake --build . -j2
diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp
index b9f95a94..62f63253 100644
--- a/test_common/gl/helpers.cpp
+++ b/test_common/gl/helpers.cpp
@@ -966,12 +966,13 @@ void reorder_verification_buffer(GLenum glFormat, GLenum glType, char* buffer, s
#ifdef GL_VERSION_3_2
-#define check_gl_error() \
-{ \
- GLenum errnom = GL_NO_ERROR;\
- if ((errnom = glGetError()) != GL_NO_ERROR)\
- log_error("GL Error: 0x%04X at %s:%d\n", errnom, __FILE__, __LINE__);\
-}
+#define CHECK_GL_ERROR() \
+ { \
+ GLenum errnom = GL_NO_ERROR; \
+ if ((errnom = glGetError()) != GL_NO_ERROR) \
+ log_error("GL Error: 0x%04X at %s:%d\n", errnom, __FILE__, \
+ __LINE__); \
+ }
const char *get_gl_vector_type( GLenum internalformat )
{
@@ -1045,10 +1046,12 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples
// Check if the renderer supports enough samples
GLint max_samples = get_gl_max_samples(target, internalFormat);
- check_gl_error()
+ CHECK_GL_ERROR()
if (max_samples < (GLint)samples)
- log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples);
+ log_error("GL error: requested samples (%zu) exceeds renderer max "
+ "samples (%d)\n",
+ samples, max_samples);
// Setup the GLSL program
const GLchar *vertex_source =
@@ -1075,36 +1078,36 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples
glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, &vertex_source, NULL);
glCompileShader(vertex_shader);
- check_gl_error()
+ CHECK_GL_ERROR()
glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment_shader, 1, &fragment_source, NULL);
glCompileShader(fragment_shader);
- check_gl_error()
+ CHECK_GL_ERROR()
GLuint prog = glCreateProgram();
glAttachShader(prog, vertex_shader);
glAttachShader(prog, fragment_shader);
- check_gl_error()
+ CHECK_GL_ERROR()
glBindAttribLocation(prog, 0, "att0");
glLinkProgram(prog);
- check_gl_error()
+ CHECK_GL_ERROR()
// Setup the FBO and texture
glFramebufferWrapper fbo;
glGenFramebuffers(1, &fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
- check_gl_error()
+ CHECK_GL_ERROR()
glViewport(0, 0, width, height);
- check_gl_error()
+ CHECK_GL_ERROR()
GLuint tex = 0;
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, tex);
glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, samples, internalFormat, width, height, fixedSampleLocations);
- check_gl_error()
+ CHECK_GL_ERROR()
GLint attachment;
switch (internalFormat) {
@@ -1122,7 +1125,7 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples
}
glFramebufferTexture(GL_FRAMEBUFFER, attachment, tex, 0);
- check_gl_error()
+ CHECK_GL_ERROR()
GLint status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
if (status == GL_FRAMEBUFFER_UNSUPPORTED) {
@@ -1142,22 +1145,24 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples
// Check if the framebuffer supports enough samples
GLint fbo_samples = 0;
glGetIntegerv(GL_SAMPLES, &fbo_samples);
- check_gl_error();
+ CHECK_GL_ERROR();
if (fbo_samples < (GLint)samples)
- log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples);
+ log_error(
+ "GL Error: requested samples (%zu) exceeds FBO capability (%d)\n",
+ samples, fbo_samples);
glUseProgram(prog);
- check_gl_error()
+ CHECK_GL_ERROR()
if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) {
glDisable(GL_DEPTH_TEST);
- check_gl_error()
+ CHECK_GL_ERROR()
}
else {
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_ALWAYS);
- check_gl_error()
+ CHECK_GL_ERROR()
}
// Setup the VBO for rendering a quad
@@ -1172,14 +1177,14 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW);
- check_gl_error()
+ CHECK_GL_ERROR()
glVertexArraysWrapper vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0);
- check_gl_error()
+ CHECK_GL_ERROR()
//clearing color and depth buffer
glClearColor(0, 0, 0, 0);
@@ -1223,13 +1228,13 @@ void * CreateGLTexture2DMultisample( size_t width, size_t height, size_t samples
color += color_delta;
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
- check_gl_error();
+ CHECK_GL_ERROR();
glFlush();
}
glDisable(GL_SAMPLE_MASK);
- check_gl_error();
+ CHECK_GL_ERROR();
*outTextureID = tex;
@@ -1306,7 +1311,9 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
GLint max_samples = get_gl_max_samples(target, internalFormat);
if (max_samples < (GLint)samples)
- log_error("GL error: requested samples (%d) exceeds renderer max samples (%d)\n", samples, max_samples);
+ log_error("GL error: requested samples (%zu) exceeds renderer max "
+ "samples (%d)\n",
+ samples, max_samples);
// Setup the GLSL program
const GLchar *vertex_source =
@@ -1333,36 +1340,36 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
glShaderWrapper vertex_shader = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex_shader, 1, &vertex_source, NULL);
glCompileShader(vertex_shader);
- check_gl_error()
+ CHECK_GL_ERROR()
glShaderWrapper fragment_shader = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment_shader, 1, &fragment_source, NULL);
glCompileShader(fragment_shader);
- check_gl_error()
+ CHECK_GL_ERROR()
glProgramWrapper prog = glCreateProgram();
glAttachShader(prog, vertex_shader);
glAttachShader(prog, fragment_shader);
- check_gl_error()
+ CHECK_GL_ERROR()
glBindAttribLocation(prog, 0, "att0");
glLinkProgram(prog);
- check_gl_error()
+ CHECK_GL_ERROR()
// Setup the FBO and texture
glFramebufferWrapper fbo;
glGenFramebuffers(1, &fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
- check_gl_error()
+ CHECK_GL_ERROR()
glViewport(0, 0, width, height);
- check_gl_error()
+ CHECK_GL_ERROR()
GLuint tex = 0;
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, tex);
glTexImage3DMultisample(GL_TEXTURE_2D_MULTISAMPLE_ARRAY, samples, internalFormat, width, height, total_layers, fixedSampleLocations);
- check_gl_error()
+ CHECK_GL_ERROR()
GLint attachment;
switch (internalFormat) {
@@ -1384,12 +1391,12 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) {
glDisable(GL_DEPTH_TEST);
- check_gl_error()
+ CHECK_GL_ERROR()
}
else {
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_ALWAYS);
- check_gl_error()
+ CHECK_GL_ERROR()
}
// Setup the VBO for rendering a quad
@@ -1404,18 +1411,18 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(quad), quad, GL_STREAM_DRAW);
- check_gl_error()
+ CHECK_GL_ERROR()
glVertexArraysWrapper vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat)*2, 0);
- check_gl_error()
+ CHECK_GL_ERROR()
for (size_t l=0; l!=total_layers; ++l) {
glFramebufferTextureLayer(GL_FRAMEBUFFER, attachment, tex, 0, l);
- check_gl_error()
+ CHECK_GL_ERROR()
GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
if (status == GL_FRAMEBUFFER_UNSUPPORTED) {
@@ -1435,13 +1442,15 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
// Check if the framebuffer supports enough samples
GLint fbo_samples = 0;
glGetIntegerv(GL_SAMPLES, &fbo_samples);
- check_gl_error();
+ CHECK_GL_ERROR();
if (fbo_samples < (GLint)samples)
- log_error("GL Error: requested samples (%d) exceeds FBO capability (%d)\n", samples, fbo_samples);
+ log_error(
+ "GL Error: requested samples (%zu) exceeds FBO capability (%d)\n",
+ samples, fbo_samples);
glUseProgram(prog);
- check_gl_error()
+ CHECK_GL_ERROR()
//clearing color and depth buffer
glClearColor(0, 0, 0, 0);
@@ -1482,13 +1491,13 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
glUniform1f(glGetUniformLocation(prog, "depthVal"), val);
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
- check_gl_error();
+ CHECK_GL_ERROR();
glFlush();
}
glDisable(GL_SAMPLE_MASK);
- check_gl_error();
+ CHECK_GL_ERROR();
}
*outTextureID = tex;
@@ -1715,7 +1724,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
// Reverse and reorder to validate since in the
// kernel the read_imagef() call always returns RGBA
cl_uchar *p = (cl_uchar *)buffer;
- for( size_t i = 0; i < (size_t)width * height; i++ )
+ for (GLsizei i = 0; i < width * height; i++)
{
cl_uchar uc0 = p[i * 4 + 0];
cl_uchar uc1 = p[i * 4 + 1];
@@ -1733,7 +1742,7 @@ void * CreateGLRenderbuffer( GLsizei width, GLsizei height,
// Reverse and reorder to validate since in the
// kernel the read_imagef() call always returns RGBA
cl_uchar *p = (cl_uchar *)buffer;
- for( size_t i = 0; i < width * height; i++ )
+ for (GLsizei i = 0; i < width * height; i++)
{
cl_uchar uc0 = p[i * 4 + 0];
cl_uchar uc1 = p[i * 4 + 1];
diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index 4053b7ee..a42f2917 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _COMPAT_H_
-#define _COMPAT_H_
+#ifndef COMPAT_H_
+#define COMPAT_H_
#if defined(_WIN32) && defined(_MSC_VER)
#include <Windows.h>
@@ -398,4 +398,4 @@ EXTERN_C int __builtin_clz(unsigned int pattern);
#define sleep(sec) Sleep((sec)*1000)
#endif
-#endif // _COMPAT_H_
+#endif // COMPAT_H_
diff --git a/test_common/harness/crc32.h b/test_common/harness/crc32.h
index 65ca15ee..69587011 100644
--- a/test_common/harness/crc32.h
+++ b/test_common/harness/crc32.h
@@ -15,8 +15,8 @@ Agreement or Khronos Conformance Test Source License Agreement as
executed between Khronos and the recipient.
******************************************************************/
-#ifndef _CRC32_H_
-#define _CRC32_H_
+#ifndef CRC32_H_
+#define CRC32_H_
#include <stdint.h>
#include <stddef.h>
diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp
index f5665deb..2d503eb5 100644
--- a/test_common/harness/mt19937.cpp
+++ b/test_common/harness/mt19937.cpp
@@ -51,6 +51,7 @@
#include "harness/alloc.h"
#ifdef __SSE2__
+#include <mutex>
#include <emmintrin.h>
#endif
@@ -107,7 +108,7 @@ cl_uint genrand_int32(MTdata d)
/* mag01[x] = x * MATRIX_A for x=0,1 */
static const cl_uint mag01[2] = { 0x0UL, MATRIX_A };
#ifdef __SSE2__
- static volatile int init = 0;
+ static std::once_flag init_flag;
static union {
__m128i v;
cl_uint s[4];
@@ -123,8 +124,7 @@ cl_uint genrand_int32(MTdata d)
int kk;
#ifdef __SSE2__
- if (0 == init)
- {
+ auto init_fn = []() {
upper_mask.s[0] = upper_mask.s[1] = upper_mask.s[2] =
upper_mask.s[3] = UPPER_MASK;
lower_mask.s[0] = lower_mask.s[1] = lower_mask.s[2] =
@@ -134,8 +134,8 @@ cl_uint genrand_int32(MTdata d)
MATRIX_A;
c0.s[0] = c0.s[1] = c0.s[2] = c0.s[3] = (cl_uint)0x9d2c5680UL;
c1.s[0] = c1.s[1] = c1.s[2] = c1.s[3] = (cl_uint)0xefc60000UL;
- init = 1;
- }
+ };
+ std::call_once(init_flag, init_fn);
#endif
kk = 0;
diff --git a/test_common/harness/stringHelpers.h b/test_common/harness/stringHelpers.h
new file mode 100644
index 00000000..e1275f10
--- /dev/null
+++ b/test_common/harness/stringHelpers.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef STRING_HELPERS_H
+#define STRING_HELPERS_H
+
+#include <memory>
+#include <stdexcept>
+#include <string>
+
+inline std::string concat_kernel(const char *sstr[], int num)
+{
+ std::string res;
+ for (int i = 0; i < num; i++) res += std::string(sstr[i]);
+ return res;
+}
+
+template <typename... Args>
+inline std::string str_sprintf(const std::string &str, Args... args)
+{
+ int str_size = std::snprintf(nullptr, 0, str.c_str(), args...) + 1;
+ if (str_size <= 0) throw std::runtime_error("Formatting error.");
+ size_t s = static_cast<size_t>(str_size);
+ std::unique_ptr<char[]> buffer(new char[s]);
+ std::snprintf(buffer.get(), s, str.c_str(), args...);
+ return std::string(buffer.get(), buffer.get() + s - 1);
+}
+
+#endif // STRING_HELPERS_H
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 95ea8163..3d743e71 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -835,9 +835,9 @@ void callTestFunctions(test_definition testList[],
std::vector<std::thread *> threads;
test_harness_state state = { testList, resultTestList, deviceToUse,
config };
- for (int i = 0; i < config.numWorkerThreads; i++)
+ for (unsigned i = 0; i < config.numWorkerThreads; i++)
{
- log_info("Spawning worker thread %i\n", i);
+ log_info("Spawning worker thread %u\n", i);
threads.push_back(new std::thread(test_function_runner, &state));
}
diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h
index 50c7c938..ad11b480 100644
--- a/test_common/harness/typeWrappers.h
+++ b/test_common/harness/typeWrappers.h
@@ -145,6 +145,48 @@ using clSamplerWrapper =
using clEventWrapper =
wrapper_details::Wrapper<cl_event, clRetainEvent, clReleaseEvent>;
+class clSVMWrapper {
+ void *Ptr = nullptr;
+ cl_context Ctx = nullptr;
+
+public:
+ clSVMWrapper() = default;
+
+ clSVMWrapper(cl_context C, size_t Size,
+ cl_svm_mem_flags F = CL_MEM_READ_WRITE)
+ : Ctx(C)
+ {
+ Ptr = clSVMAlloc(C, F, Size, 0);
+ }
+
+ clSVMWrapper &operator=(void *other) = delete;
+ clSVMWrapper(clSVMWrapper const &other) = delete;
+ clSVMWrapper &operator=(clSVMWrapper const &other) = delete;
+ clSVMWrapper(clSVMWrapper &&other)
+ {
+ Ptr = other.Ptr;
+ Ctx = other.Ctx;
+ other.Ptr = nullptr;
+ other.Ctx = nullptr;
+ }
+ clSVMWrapper &operator=(clSVMWrapper &&other)
+ {
+ Ptr = other.Ptr;
+ Ctx = other.Ctx;
+ other.Ptr = nullptr;
+ other.Ctx = nullptr;
+ return *this;
+ }
+
+ ~clSVMWrapper()
+ {
+ if (Ptr) clSVMFree(Ctx, Ptr);
+ }
+
+ void *operator()() const { return Ptr; }
+};
+
+
class clProtectedImage {
public:
clProtectedImage()
diff --git a/test_conformance/SVM/CMakeLists.txt b/test_conformance/SVM/CMakeLists.txt
index 2d01a825..2ad2f821 100644
--- a/test_conformance/SVM/CMakeLists.txt
+++ b/test_conformance/SVM/CMakeLists.txt
@@ -17,4 +17,6 @@ set(${MODULE_NAME}_SOURCES
test_migrate.cpp
)
+set_gnulike_module_compile_flags("-Wno-sometimes-uninitialized -Wno-sign-compare")
+
include(../CMakeCommon.txt)
diff --git a/test_conformance/allocations/CMakeLists.txt b/test_conformance/allocations/CMakeLists.txt
index a4043806..b6031225 100644
--- a/test_conformance/allocations/CMakeLists.txt
+++ b/test_conformance/allocations/CMakeLists.txt
@@ -8,4 +8,6 @@ set(${MODULE_NAME}_SOURCES
allocation_utils.cpp
)
+set_gnulike_module_compile_flags("-Wno-sign-compare")
+
include(../CMakeCommon.txt)
diff --git a/test_conformance/allocations/allocation_execute.cpp b/test_conformance/allocations/allocation_execute.cpp
index 9d0e8777..5a77c3a7 100644
--- a/test_conformance/allocations/allocation_execute.cpp
+++ b/test_conformance/allocations/allocation_execute.cpp
@@ -79,20 +79,30 @@ int check_image(cl_command_queue queue, cl_mem mem) {
return -1;
}
- if (type == CL_MEM_OBJECT_BUFFER) {
- log_error("Expected image object, not buffer.\n");
- return -1;
- } else if (type == CL_MEM_OBJECT_IMAGE2D) {
- error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width, NULL);
- if (error) {
- print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
- return -1;
- }
- error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height), &height, NULL);
- if (error) {
- print_error(error, "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
+ switch (type)
+ {
+ case CL_MEM_OBJECT_BUFFER:
+ log_error("Expected image object, not buffer.\n");
return -1;
- }
+ case CL_MEM_OBJECT_IMAGE2D:
+ error = clGetImageInfo(mem, CL_IMAGE_WIDTH, sizeof(width), &width,
+ NULL);
+ if (error)
+ {
+ print_error(error,
+ "clGetMemObjectInfo failed for CL_IMAGE_WIDTH.");
+ return -1;
+ }
+ error = clGetImageInfo(mem, CL_IMAGE_HEIGHT, sizeof(height),
+ &height, NULL);
+ if (error)
+ {
+ print_error(error,
+ "clGetMemObjectInfo failed for CL_IMAGE_HEIGHT.");
+ return -1;
+ }
+ break;
+ default: log_error("unexpected object type"); return -1;
}
diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp
index 43e81277..827072fc 100644
--- a/test_conformance/allocations/main.cpp
+++ b/test_conformance/allocations/main.cpp
@@ -326,6 +326,7 @@ int main(int argc, const char *argv[])
else if ( strcmp( argv[i], "--help" ) == 0 || strcmp( argv[i], "-h" ) == 0 )
{
printUsage( argv[0] );
+ free(argList);
return -1;
}
diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp
index 8dc8f6cf..7eedec85 100644
--- a/test_conformance/api/test_mem_object_info.cpp
+++ b/test_conformance/api/test_mem_object_info.cpp
@@ -217,6 +217,9 @@ int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_
// Create a buffer object to test against.
bufferObject = clCreateBuffer( context, bufferFlags[ i ], addressAlign * 4, NULL, &error );
test_error( error, "Unable to create buffer to test with" );
+ void *ptr;
+ TEST_MEM_OBJECT_PARAM(bufferObject, CL_MEM_HOST_PTR, ptr, NULL,
+ "host pointer", "%p", void *)
}
// Perform buffer object queries.
diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp
index 75bdd479..83fcb636 100644
--- a/test_conformance/api/test_null_buffer_arg.cpp
+++ b/test_conformance/api/test_null_buffer_arg.cpp
@@ -64,16 +64,21 @@ static int test_setargs_and_execution(cl_command_queue queue, cl_kernel kernel,
cl_int status;
const char *typestr;
- if (type == NON_NULL_PATH) {
- status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
- typestr = "non-NULL";
- } else if (type == ADDROF_NULL_PATH) {
- test_buf = NULL;
- status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
- typestr = "&NULL";
- } else if (type == NULL_PATH) {
- status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
- typestr = "NULL";
+ switch (type)
+ {
+ case NON_NULL_PATH:
+ status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+ typestr = "non-NULL";
+ break;
+ case ADDROF_NULL_PATH:
+ test_buf = NULL;
+ status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &test_buf);
+ typestr = "&NULL";
+ break;
+ case NULL_PATH:
+ status = clSetKernelArg(kernel, 0, sizeof(cl_mem), NULL);
+ typestr = "NULL";
+ break;
}
log_info("Testing setKernelArgs with %s buffer.\n", typestr);
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index fa5c227f..f0740107 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -644,6 +644,13 @@ int test_get_device_info(cl_device_id deviceID, cl_context context, cl_command_q
}
log_info( "\tReported device profile: %s \n", profile );
+ if (strcmp(profile, "FULL_PROFILE") == 0 && compilerAvail != CL_TRUE)
+ {
+ log_error("ERROR: Returned profile of device is FULL , but "
+ "CL_DEVICE_COMPILER_AVAILABLE is not CL_TRUE as required by "
+ "OpenCL 1.2!");
+ return -1;
+ }
return 0;
}
@@ -799,8 +806,8 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c
test_error(error, "clFinish failed");
if (max_dimensions == 2) {
- return 0;
free(source);
+ return 0;
}
local[1]--; local[2]++;
diff --git a/test_conformance/api/test_queue_properties.cpp b/test_conformance/api/test_queue_properties.cpp
index 62d0a734..768bd5de 100644
--- a/test_conformance/api/test_queue_properties.cpp
+++ b/test_conformance/api/test_queue_properties.cpp
@@ -107,8 +107,9 @@ int test_queue_properties(cl_device_id deviceID, cl_context context, cl_command_
clProgramWrapper program;
clKernelWrapper kernel;
- cl_queue_properties_khr device_props = 0;
- cl_queue_properties_khr queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+ cl_command_queue_properties device_props = 0;
+ cl_command_queue_properties queue_prop_def[] = { CL_QUEUE_PROPERTIES, 0,
+ 0 };
// Query extension
if (!is_extension_available(deviceID, "cl_khr_create_command_queue"))
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index 7da2dfa7..ce0410bc 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -13,6 +13,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
+
+#include <memory>
+
#include "testBase.h"
#include "harness/conversions.h"
@@ -226,13 +229,13 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
(int)global_threads[0], (int)local_threads[0]);
// Allocate our storage
- cl_mem bin_counters =
+ clMemWrapper bin_counters =
clCreateBuffer(context, CL_MEM_READ_WRITE,
sizeof(cl_int) * number_of_bins, NULL, NULL);
- cl_mem bins = clCreateBuffer(
+ clMemWrapper bins = clCreateBuffer(
context, CL_MEM_READ_WRITE,
sizeof(cl_int) * number_of_bins * max_counts_per_bin, NULL, NULL);
- cl_mem bin_assignments =
+ clMemWrapper bin_assignments =
clCreateBuffer(context, CL_MEM_READ_ONLY,
sizeof(cl_int) * number_of_items, NULL, NULL);
@@ -253,7 +256,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
}
// Initialize our storage
- cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ std::unique_ptr<cl_int[]> l_bin_counts(new cl_int[number_of_bins]);
if (!l_bin_counts)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
@@ -263,8 +266,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
int i;
for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
- sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
- NULL, NULL);
+ sizeof(cl_int) * number_of_bins,
+ l_bin_counts.get(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to set initial values for "
@@ -273,8 +276,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1;
}
- cl_int *values =
- (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ std::unique_ptr<cl_int[]> values(
+ new cl_int[number_of_bins * max_counts_per_bin]);
if (!values)
{
log_error(
@@ -285,7 +288,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
err = clEnqueueWriteBuffer(queue, bins, true, 0,
sizeof(cl_int) * number_of_bins
* max_counts_per_bin,
- values, 0, NULL, NULL);
+ values.get(), 0, NULL, NULL);
if (err)
{
log_error(
@@ -293,10 +296,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
err);
return -1;
}
- free(values);
- cl_int *l_bin_assignments =
- (cl_int *)malloc(sizeof(cl_int) * number_of_items);
+ std::unique_ptr<cl_int[]> l_bin_assignments(new cl_int[number_of_items]);
if (!l_bin_assignments)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
@@ -326,7 +327,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
}
err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
sizeof(cl_int) * number_of_items,
- l_bin_assignments, 0, NULL, NULL);
+ l_bin_assignments.get(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to set initial values for "
@@ -355,8 +356,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1;
}
- cl_int *final_bin_assignments =
- (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+ std::unique_ptr<cl_int[]> final_bin_assignments(
+ new cl_int[number_of_bins * max_counts_per_bin]);
if (!final_bin_assignments)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
@@ -366,15 +367,14 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
err = clEnqueueReadBuffer(queue, bins, true, 0,
sizeof(cl_int) * number_of_bins
* max_counts_per_bin,
- final_bin_assignments, 0, NULL, NULL);
+ final_bin_assignments.get(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
return -1;
}
- cl_int *final_bin_counts =
- (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+ std::unique_ptr<cl_int[]> final_bin_counts(new cl_int[number_of_bins]);
if (!final_bin_counts)
{
log_error("add_index_bin_test FAILED to allocate initial values for "
@@ -382,8 +382,8 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
return -1;
}
err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
- sizeof(cl_int) * number_of_bins, final_bin_counts,
- 0, NULL, NULL);
+ sizeof(cl_int) * number_of_bins,
+ final_bin_counts.get(), 0, NULL, NULL);
if (err)
{
log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
@@ -460,13 +460,7 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
errors++;
}
}
- free(l_bin_counts);
- free(l_bin_assignments);
- free(final_bin_assignments);
- free(final_bin_counts);
- clReleaseMemObject(bin_counters);
- clReleaseMemObject(bins);
- clReleaseMemObject(bin_assignments);
+
if (errors == 0)
{
log_info("add_index_bin_test passed. Each item was put in the correct "
diff --git a/test_conformance/basic/CMakeLists.txt b/test_conformance/basic/CMakeLists.txt
index dde3311d..684a7d1d 100644
--- a/test_conformance/basic/CMakeLists.txt
+++ b/test_conformance/basic/CMakeLists.txt
@@ -2,7 +2,7 @@ set(MODULE_NAME BASIC)
set(${MODULE_NAME}_SOURCES
main.cpp
- test_fpmath_float.cpp
+ test_fpmath.cpp
test_intmath.cpp
test_hiloeo.cpp test_local.cpp test_pointercast.cpp
test_if.cpp test_loop.cpp
@@ -11,7 +11,7 @@ set(${MODULE_NAME}_SOURCES
test_multireadimageonefmt.cpp test_multireadimagemultifmt.cpp
test_imagedim.cpp
test_vloadstore.cpp
- test_int2float.cpp test_float2int.cpp
+ test_int2fp.cpp
test_createkernelsinprogram.cpp
test_hostptr.cpp
test_explicit_s2v.cpp
@@ -52,14 +52,12 @@ set(${MODULE_NAME}_SOURCES
test_kernel_call_kernel_function.cpp
test_local_kernel_scope.cpp
test_progvar.cpp
- test_wg_barrier.cpp
test_global_linear_id.cpp
test_local_linear_id.cpp
test_enqueued_local_size.cpp
test_simple_image_pitch.cpp
test_get_linear_ids.cpp
test_rw_image_access_qualifier.cpp
- test_wg_barrier.cpp
test_enqueued_local_size.cpp
test_global_linear_id.cpp
test_local_linear_id.cpp
@@ -70,6 +68,6 @@ if(APPLE)
list(APPEND ${MODULE_NAME}_SOURCES test_queue_priority.cpp)
endif(APPLE)
-set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+set_gnulike_module_compile_flags("-Wno-sign-compare")
include(../CMakeCommon.txt)
diff --git a/test_conformance/basic/main.cpp b/test_conformance/basic/main.cpp
index 86c3cec3..d1901f95 100644
--- a/test_conformance/basic/main.cpp
+++ b/test_conformance/basic/main.cpp
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2023 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -22,14 +22,15 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+
+#include <CL/cl_half.h>
+
#include "harness/testHarness.h"
#include "procs.h"
test_definition test_list[] = {
ADD_TEST(hostptr),
- ADD_TEST(fpmath_float),
- ADD_TEST(fpmath_float2),
- ADD_TEST(fpmath_float4),
+ ADD_TEST(fpmath),
ADD_TEST(intmath_int),
ADD_TEST(intmath_int2),
ADD_TEST(intmath_int4),
@@ -58,8 +59,8 @@ test_definition test_list[] = {
ADD_TEST(image_r8),
ADD_TEST(barrier),
ADD_TEST_VERSION(wg_barrier, Version(2, 0)),
- ADD_TEST(int2float),
- ADD_TEST(float2int),
+ ADD_TEST(int2fp),
+ ADD_TEST(fp2int),
ADD_TEST(imagereadwrite),
ADD_TEST(imagereadwrite3d),
ADD_TEST(readimage3d),
@@ -155,7 +156,7 @@ test_definition test_list[] = {
ADD_TEST(simple_read_image_pitch),
ADD_TEST(simple_write_image_pitch),
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
ADD_TEST(queue_priority),
#endif
@@ -164,9 +165,35 @@ test_definition test_list[] = {
};
const int test_num = ARRAY_SIZE( test_list );
+cl_half_rounding_mode halfRoundingMode = CL_HALF_RTE;
+
+test_status InitCL(cl_device_id device)
+{
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ const cl_device_fp_config fpConfigHalf =
+ get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
+ if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
+ {
+ halfRoundingMode = CL_HALF_RTE;
+ }
+ else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
+ {
+ halfRoundingMode = CL_HALF_RTZ;
+ }
+ else
+ {
+ log_error("Error while acquiring half rounding mode");
+ return TEST_FAIL;
+ }
+ }
+
+ return TEST_PASS;
+}
int main(int argc, const char *argv[])
{
- return runTestHarness(argc, argv, test_num, test_list, false, 0);
+ return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0,
+ InitCL);
}
diff --git a/test_conformance/basic/procs.h b/test_conformance/basic/procs.h
index c14340de..b685ecd5 100644
--- a/test_conformance/basic/procs.h
+++ b/test_conformance/basic/procs.h
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -13,6 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
+
#include "harness/kernelHelpers.h"
#include "harness/testHarness.h"
#include "harness/errorHelpers.h"
@@ -21,9 +22,8 @@
#include "harness/rounding_mode.h"
extern int test_hostptr(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_fpmath_float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_fpmath_float2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_fpmath_float4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_fpmath(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
extern int test_intmath_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_intmath_int4(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
@@ -52,8 +52,10 @@ extern int test_image_r8(cl_device_id deviceID, cl_context context, cl_comm
extern int test_simplebarrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_wg_barrier(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_int2float(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int test_float2int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_int2fp(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_fp2int(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements);
extern int test_imagearraycopy(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagearraycopy3d(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
extern int test_imagereadwrite(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
diff --git a/test_conformance/basic/test_astype.cpp b/test_conformance/basic/test_astype.cpp
index 7281f904..45669a7c 100644
--- a/test_conformance/basic/test_astype.cpp
+++ b/test_conformance/basic/test_astype.cpp
@@ -14,62 +14,39 @@
// limitations under the License.
//
#include "harness/compat.h"
+#include "harness/conversions.h"
+#include "harness/stringHelpers.h"
+#include "harness/typeWrappers.h"
+#include <limits.h>
#include <stdio.h>
#include <string.h>
-#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
-
+#include <vector>
#include "procs.h"
-#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
+// clang-format off
-static const char *astype_kernel_pattern =
-"%s\n"
-"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
-"{\n"
-" int tid = get_global_id( 0 );\n"
-" %s%s tmp = as_%s%s( src[ tid ] );\n"
-" dst[ tid ] = tmp;\n"
-"}\n";
-
-static const char *astype_kernel_pattern_V3srcV3dst =
-"%s\n"
-"__kernel void test_fn( __global %s *src, __global %s *dst )\n"
-"{\n"
-" int tid = get_global_id( 0 );\n"
-" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
-" vstore3(tmp,tid,dst);\n"
-"}\n";
-// in the printf, remove the third and fifth argument, each of which
-// should be a "3", when copying from the printf for astype_kernel_pattern
-
-static const char *astype_kernel_pattern_V3dst =
-"%s\n"
-"__kernel void test_fn( __global %s%s *src, __global %s *dst )\n"
-"{\n"
-" int tid = get_global_id( 0 );\n"
-" %s3 tmp = as_%s3( src[ tid ] );\n"
-" vstore3(tmp,tid,dst);\n"
-"}\n";
-// in the printf, remove the fifth argument, which
-// should be a "3", when copying from the printf for astype_kernel_pattern
+static char extension[128] = { 0 };
+static char strLoad[128] = { 0 };
+static char strStore[128] = { 0 };
+static const char *regLoad = "as_%s%s(src[tid]);\n";
+static const char *v3Load = "as_%s%s(vload3(tid,(__global %s*)src));\n";
+static const char *regStore = "dst[tid] = tmp;\n";
+static const char *v3Store = "vstore3(tmp, tid, (__global %s*)dst);\n";
-
-static const char *astype_kernel_pattern_V3src =
-"%s\n"
-"__kernel void test_fn( __global %s *src, __global %s%s *dst )\n"
+static const char* astype_kernel_pattern[] = {
+extension,
+"__kernel void test_fn( __global %s%s *src, __global %s%s *dst )\n"
"{\n"
-" int tid = get_global_id( 0 );\n"
-" %s%s tmp = as_%s%s( vload3(tid,src) );\n"
-" dst[ tid ] = tmp;\n"
-"}\n";
-// in the printf, remove the third argument, which
-// should be a "3", when copying from the printf for astype_kernel_pattern
+" int tid = get_global_id( 0 );\n",
+" %s%s tmp = ", strLoad,
+" ", strStore,
+"}\n"};
+// clang-format on
int test_astype_set( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType inVecType, ExplicitType outVecType,
unsigned int vecSize, unsigned int outVecSize,
@@ -81,68 +58,60 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
- char programSrc[ 10240 ];
size_t threads[ 1 ], localThreads[ 1 ];
size_t typeSize = get_explicit_type_size( inVecType );
size_t outTypeSize = get_explicit_type_size(outVecType);
char sizeNames[][ 3 ] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
- MTdata d;
-
-
-
- // Create program
- if(outVecSize == 3 && vecSize == 3) {
- // astype_kernel_pattern_V3srcV3dst
- sprintf( programSrc, astype_kernel_pattern_V3srcV3dst,
- (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- get_explicit_type_name( inVecType ), // sizeNames[ vecSize ],
- get_explicit_type_name( outVecType ), // sizeNames[ outVecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ] );
- } else if(outVecSize == 3) {
- // astype_kernel_pattern_V3dst
- sprintf( programSrc, astype_kernel_pattern_V3dst,
- (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
- get_explicit_type_name( outVecType ),
- get_explicit_type_name( outVecType ),
- get_explicit_type_name( outVecType ));
-
- } else if(vecSize == 3) {
- // astype_kernel_pattern_V3src
- sprintf( programSrc, astype_kernel_pattern_V3src,
- (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- get_explicit_type_name( inVecType ),// sizeNames[ vecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
- } else {
- sprintf( programSrc, astype_kernel_pattern,
- (outVecType == kDouble || inVecType == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- get_explicit_type_name( inVecType ), sizeNames[ vecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ],
- get_explicit_type_name( outVecType ), sizeNames[ outVecSize ]);
- }
-
- const char *ptr = programSrc;
+ MTdataHolder d(gRandomSeed);
+
+ std::ostringstream sstr;
+ if (outVecType == kDouble || inVecType == kDouble)
+ sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+ if (outVecType == kHalf || inVecType == kHalf)
+ sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+
+ strcpy(extension, sstr.str().c_str());
+
+ if (vecSize == 3)
+ std::snprintf(strLoad, sizeof(strLoad), v3Load,
+ get_explicit_type_name(outVecType), sizeNames[outVecSize],
+ get_explicit_type_name(inVecType));
+ else
+ std::snprintf(strLoad, sizeof(strLoad), regLoad,
+ get_explicit_type_name(outVecType),
+ sizeNames[outVecSize]);
+
+ if (outVecSize == 3)
+ std::snprintf(strStore, sizeof(strStore), v3Store,
+ get_explicit_type_name(outVecType));
+ else
+ std::snprintf(strStore, sizeof(strStore), "%s", regStore);
+
+ auto str =
+ concat_kernel(astype_kernel_pattern,
+ sizeof(astype_kernel_pattern) / sizeof(const char *));
+ std::string kernelSource =
+ str_sprintf(str, get_explicit_type_name(inVecType), sizeNames[vecSize],
+ get_explicit_type_name(outVecType), sizeNames[outVecSize],
+ get_explicit_type_name(outVecType), sizeNames[outVecSize]);
+
+ const char *ptr = kernelSource.c_str();
error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
test_error( error, "Unable to create testing kernel" );
-
// Create some input values
size_t inBufferSize = sizeof(char)* numElements * get_explicit_type_size( inVecType ) * vecSize;
- char *inBuffer = (char*)malloc( inBufferSize );
+ std::vector<char> inBuffer(inBufferSize);
size_t outBufferSize = sizeof(char)* numElements * get_explicit_type_size( outVecType ) *outVecSize;
- char *outBuffer = (char*)malloc( outBufferSize );
+ std::vector<char> outBuffer(outBufferSize);
- d = init_genrand( gRandomSeed );
- generate_random_data( inVecType, numElements * vecSize,
- d, inBuffer );
- free_mtdata(d); d = NULL;
+ generate_random_data(inVecType, numElements * vecSize, d,
+ &inBuffer.front());
// Create I/O streams and set arguments
- streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, inBufferSize, inBuffer, &error );
+ streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
+ &inBuffer.front(), &error);
test_error( error, "Unable to create I/O stream" );
streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, outBufferSize, NULL, &error );
test_error( error, "Unable to create I/O stream" );
@@ -161,15 +130,15 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
test_error( error, "Unable to run kernel" );
-
// Get the results and compare
// The beauty is that astype is supposed to return the bit pattern as a different type, which means
// the output should have the exact same bit pattern as the input. No interpretation necessary!
- error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, outBufferSize, outBuffer, 0, NULL, NULL );
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
+ &outBuffer.front(), 0, NULL, NULL);
test_error( error, "Unable to read results" );
- char *expected = inBuffer;
- char *actual = outBuffer;
+ char *expected = &inBuffer.front();
+ char *actual = &outBuffer.front();
size_t compSize = typeSize*vecSize;
if(outTypeSize*outVecSize < compSize) {
compSize = outTypeSize*outVecSize;
@@ -178,8 +147,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
if(outVecSize == 4 && vecSize == 3)
{
// as_type4(vec3) should compile but produce undefined results??
- free(inBuffer);
- free(outBuffer);
return 0;
}
@@ -188,8 +155,6 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
// as_typen(vecm) should compile and run but produce
// implementation-defined results for m != n
// and n*sizeof(type) = sizeof(vecm)
- free(inBuffer);
- free(outBuffer);
return 0;
}
@@ -203,17 +168,14 @@ int test_astype_set( cl_device_id device, cl_context context, cl_command_queue q
GetDataVectorString( expected, typeSize, vecSize, expectedString ),
GetDataVectorString( actual, typeSize, vecSize, actualString ) );
log_error("Src is :\n%s\n----\n%d threads %d localthreads\n",
- programSrc, (int)threads[0],(int) localThreads[0]);
- free(inBuffer);
- free(outBuffer);
+ kernelSource.c_str(), (int)threads[0],
+ (int)localThreads[0]);
return 1;
}
expected += typeSize * vecSize;
actual += outTypeSize * outVecSize;
}
- free(inBuffer);
- free(outBuffer);
return 0;
}
@@ -223,31 +185,39 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
// legal in OpenCL 1.0, the result is dependent on the device it runs on, which means there's no actual way
// for us to verify what is "valid". So the only thing we can test are types that match in size independent
// of the element count (char -> uchar, etc)
- ExplicitType vecTypes[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
- unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+ const std::vector<ExplicitType> vecTypes = { kChar, kUChar, kShort,
+ kUShort, kInt, kUInt,
+ kLong, kULong, kFloat,
+ kHalf, kDouble };
+ const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int inTypeIdx, outTypeIdx, sizeIdx, outSizeIdx;
size_t inTypeSize, outTypeSize;
int error = 0;
- for( inTypeIdx = 0; vecTypes[ inTypeIdx ] != kNumExplicitTypes; inTypeIdx++ )
+ bool fp16Support = is_extension_available(device, "cl_khr_fp16");
+ bool fp64Support = is_extension_available(device, "cl_khr_fp64");
+
+ auto skip_type = [&](ExplicitType et) {
+ if ((et == kLong || et == kULong) && !gHasLong)
+ return true;
+ else if (et == kDouble && !fp64Support)
+ return true;
+ else if (et == kHalf && !fp16Support)
+ return true;
+ return false;
+ };
+
+ for (inTypeIdx = 0; inTypeIdx < vecTypes.size(); inTypeIdx++)
{
inTypeSize = get_explicit_type_size(vecTypes[inTypeIdx]);
- if( vecTypes[ inTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
- continue;
-
- if (( vecTypes[ inTypeIdx ] == kLong || vecTypes[ inTypeIdx ] == kULong ) && !gHasLong )
- continue;
+ if (skip_type(vecTypes[inTypeIdx])) continue;
- for( outTypeIdx = 0; vecTypes[ outTypeIdx ] != kNumExplicitTypes; outTypeIdx++ )
+ for (outTypeIdx = 0; outTypeIdx < vecTypes.size(); outTypeIdx++)
{
outTypeSize = get_explicit_type_size(vecTypes[outTypeIdx]);
- if( vecTypes[ outTypeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) ) {
- continue;
- }
- if (( vecTypes[ outTypeIdx ] == kLong || vecTypes[ outTypeIdx ] == kULong ) && !gHasLong )
- continue;
+ if (skip_type(vecTypes[outTypeIdx])) continue;
// change this check
if( inTypeIdx == outTypeIdx ) {
@@ -259,7 +229,6 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
{
-
for(outSizeIdx = 0; vecSizes[outSizeIdx] != 0; outSizeIdx++)
{
if(vecSizes[sizeIdx]*inTypeSize !=
@@ -268,10 +237,7 @@ int test_astype(cl_device_id device, cl_context context, cl_command_queue queue,
continue;
}
error += test_astype_set( device, context, queue, vecTypes[ inTypeIdx ], vecTypes[ outTypeIdx ], vecSizes[ sizeIdx ], vecSizes[outSizeIdx], n_elems );
-
-
}
-
}
if(get_explicit_type_size(vecTypes[inTypeIdx]) ==
get_explicit_type_size(vecTypes[outTypeIdx])) {
diff --git a/test_conformance/basic/test_async_copy.cpp b/test_conformance/basic/test_async_copy.cpp
index a537c8fe..bb529bce 100644
--- a/test_conformance/basic/test_async_copy.cpp
+++ b/test_conformance/basic/test_async_copy.cpp
@@ -20,8 +20,7 @@
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
-
-
+#include <vector>
#include "procs.h"
#include "harness/conversions.h"
@@ -86,8 +85,7 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
- void *inBuffer, *outBuffer;
- MTdata d;
+ MTdataHolder d(gRandomSeed);
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
@@ -109,9 +107,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
char programSource[4096]; programSource[0]=0;
char *programPtr;
- sprintf(programSource, kernelCode,
- vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+ std::string extStr = "";
+ if (vecType == kDouble)
+ extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
+ else if (vecType == kHalf)
+ extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
+
+ sprintf(programSource, kernelCode, extStr.c_str(), vecNameString,
+ vecNameString, vecNameString, vecNameString,
+ get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
@@ -150,9 +154,10 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
- inBuffer = (void*)malloc(globalBufferSize);
- outBuffer = (void*)malloc(globalBufferSize);
- memset(outBuffer, 0, globalBufferSize);
+ std::vector<unsigned char> inBuffer(globalBufferSize);
+ std::vector<unsigned char> outBuffer(globalBufferSize);
+
+ outBuffer.assign(globalBufferSize, 0);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
@@ -164,13 +169,15 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
- d = init_genrand( gRandomSeed );
- generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
- free_mtdata(d); d = NULL;
+ generate_random_data(vecType,
+ globalBufferSize / get_explicit_type_size(vecType), d,
+ &inBuffer.front());
- streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+ streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+ &inBuffer.front(), &error);
test_error( error, "Unable to create input buffer" );
- streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+ streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+ &outBuffer.front(), &error);
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
@@ -189,16 +196,18 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
test_error( error, "Unable to queue kernel" );
// Read
- error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize,
+ &outBuffer.front(), 0, NULL, NULL);
test_error( error, "Unable to read results" );
// Verify
int failuresPrinted = 0;
- if( memcmp( inBuffer, outBuffer, globalBufferSize ) != 0 )
+ if (memcmp(&inBuffer.front(), &outBuffer.front(), globalBufferSize) != 0)
{
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
- unsigned char * inchar = (unsigned char*)inBuffer;
- unsigned char * outchar = (unsigned char*)outBuffer;
+ unsigned char *inchar = static_cast<unsigned char *>(&inBuffer.front());
+ unsigned char *outchar =
+ static_cast<unsigned char *>(&outBuffer.front());
for (int i=0; i< (int)globalBufferSize; i+=(int)elementSize) {
if (memcmp( ((char *)inchar)+i, ((char *)outchar)+i, typeSize) != 0 )
{
@@ -226,26 +235,29 @@ int test_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue,
}
}
- free(inBuffer);
- free(outBuffer);
-
return failuresPrinted ? -1 : 0;
}
int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode) {
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+ const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
+ kInt, kUInt, kLong, kULong,
+ kFloat, kHalf, kDouble };
unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
unsigned int size, typeIndex;
int errors = 0;
- for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
- {
- if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
- continue;
+ bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
+ bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
+ for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++)
+ {
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
+ else if (vecType[typeIndex] == kDouble && !fp64Support)
+ continue;
+ else if (vecType[typeIndex] == kHalf && !fp16Support)
+ continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
@@ -259,9 +271,6 @@ int test_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_qu
return 0;
}
-
-
-
int test_async_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_copy_all_types( deviceID, context, queue, async_global_to_local_kernel );
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index bf3f1552..11ef84bd 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -27,17 +27,25 @@
static const char *async_global_to_local_kernel2D = R"OpenCLC(
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
-__kernel void test_fn(const __global %s *src, __global %s *dst,
- __local %s *localBuffer, int numElementsPerLine,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+ uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst,
+ __local VarSizeStruct *localBuffer, int numElementsPerLine,
int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem,
int srcStride, int dstStride) {
// Zero the local storage first
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
for (int j = 0; j < numElementsPerLine; j++) {
const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
- localBuffer[index] = (%s)(%s)0;
+ for (int k = 0; k < STRUCT_SIZE; k++) {
+ localBuffer[index].byte[k] = 0;
+ }
}
}
@@ -45,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst,
// try the copy
barrier( CLK_LOCAL_MEM_FENCE );
event_t event = async_work_group_copy_2D2D(localBuffer, 0, src,
- lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s),
+ lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(VarSizeStruct),
(size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0);
// Wait for the copy to complete, then verify by manually copying to the dest
@@ -63,16 +71,24 @@ __kernel void test_fn(const __global %s *src, __global %s *dst,
static const char *async_local_to_global_kernel2D = R"OpenCLC(
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
-__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+ uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer,
int numElementsPerLine, int lineCopiesPerWorkgroup,
int lineCopiesPerWorkItem, int srcStride, int dstStride) {
// Zero the local storage first
for (int i = 0; i < lineCopiesPerWorkItem; i++) {
for (int j = 0; j < numElementsPerLine; j++) {
const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
- localBuffer[index] = (%s)(%s)0;
+ for (int k = 0; k < STRUCT_SIZE; k++) {
+ localBuffer[index].byte[k] = 0;
+ }
}
}
@@ -90,36 +106,22 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
// Do this to verify all kernels are done copying to the local buffer before we try the copy
barrier(CLK_LOCAL_MEM_FENCE);
event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride,
- localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
+ localBuffer, 0, sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
dstStride, 0 );
wait_group_events(1, &event);
};
)OpenCLC";
-int test_copy2D(cl_device_id deviceID, cl_context context,
- cl_command_queue queue, const char *kernelCode,
- ExplicitType vecType, int vecSize, int srcMargin, int dstMargin,
- bool localIsDst)
+int test_copy2D(const cl_device_id deviceID, const cl_context context,
+ const cl_command_queue queue, const char *const kernelCode,
+ const size_t elementSize, const int srcMargin,
+ const int dstMargin, const bool localIsDst)
{
int error;
- clProgramWrapper program;
- clKernelWrapper kernel;
- clMemWrapper streams[2];
- size_t threads[1], localThreads[1];
- void *inBuffer, *outBuffer, *outBufferCopy;
- MTdata d;
- char vecNameString[64];
- vecNameString[0] = 0;
- if (vecSize == 1)
- sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
- else
- sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
- vecSize);
- size_t elementSize = get_explicit_type_size(vecType) * vecSize;
- log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString,
- srcMargin, dstMargin);
+ log_info("Testing %d byte element with srcMargin = %d, dstMargin = %d\n",
+ elementSize, srcMargin, dstMargin);
cl_long max_local_mem_size;
error =
@@ -139,6 +141,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
+ cl_long max_work_group_size;
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_work_group_size), &max_work_group_size,
+ NULL);
+ test_error(error,
+ "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed.");
+
if (max_alloc_size > max_global_mem_size / 2)
max_alloc_size = max_global_mem_size / 2;
@@ -149,20 +158,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
- char programSource[4096];
- programSource[0] = 0;
- char *programPtr;
+ char programSource[4096] = { 0 };
+ const char *programPtr = programSource;
- sprintf(programSource, kernelCode,
- vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
- : "",
- vecNameString, vecNameString, vecNameString, vecNameString,
- get_explicit_type_name(vecType), vecNameString);
+ sprintf(programSource, kernelCode, elementSize);
// log_info("program: %s\n", programSource);
- programPtr = programSource;
+
+ clProgramWrapper program;
+ clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
- (const char **)&programPtr, "test_fn");
+ &programPtr, "test_fn");
test_error(error, "Unable to create testing kernel");
size_t max_workgroup_size;
@@ -188,9 +194,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
const cl_int dstStride = numElementsPerLine + dstMargin;
const cl_int srcStride = numElementsPerLine + srcMargin;
- elementSize =
- get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-
const size_t lineCopiesPerWorkItem = 13;
const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem
* elementSize * (localIsDst ? dstStride : srcStride);
@@ -208,7 +211,6 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
if (maxLocalWorkgroupSize > max_workgroup_size)
localWorkgroupSize = max_workgroup_size;
-
const size_t maxTotalLinesIn =
(max_alloc_size / elementSize + srcMargin) / srcStride;
const size_t maxTotalLinesOut =
@@ -231,9 +233,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
const size_t globalWorkgroupSize =
numberOfLocalWorkgroups * localWorkgroupSize;
- inBuffer = (void *)malloc(inBufferSize);
- outBuffer = (void *)malloc(outBufferSize);
- outBufferCopy = (void *)malloc(outBufferSize);
+ if ((localBufferSize / 4) > max_work_group_size)
+ {
+ log_info("Skipping due to resource requirements local:%db "
+ "max_work_group_size:%d\n",
+ localBufferSize, max_work_group_size);
+ return 0;
+ }
+
+ void *const inBuffer = (void *)malloc(inBufferSize);
+ void *const outBuffer = (void *)malloc(outBufferSize);
+ void *const outBufferCopy = (void *)malloc(outBufferSize);
const cl_int lineCopiesPerWorkItemInt =
static_cast<cl_int>(lineCopiesPerWorkItem);
@@ -250,18 +260,20 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
(int)inBufferSize, (int)outBufferSize, lineCopiesPerWorkgroup,
lineCopiesPerWorkItemInt);
+ size_t threads[1], localThreads[1];
+
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
- d = init_genrand(gRandomSeed);
- generate_random_data(
- vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
- generate_random_data(
- vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
+ MTdata d = init_genrand(gRandomSeed);
+ generate_random_data(kChar, inBufferSize, d, inBuffer);
+ generate_random_data(kChar, outBufferSize, d, outBuffer);
free_mtdata(d);
d = NULL;
memcpy(outBufferCopy, outBuffer, outBufferSize);
+ clMemWrapper streams[2];
+
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
inBuffer, &error);
test_error(error, "Unable to create input buffer");
@@ -301,8 +313,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
// Verify
int failuresPrinted = 0;
- // Verify
- size_t typeSize = get_explicit_type_size(vecType) * vecSize;
+
for (int i = 0;
i < (int)globalWorkgroupSize * lineCopiesPerWorkItem * elementSize;
i += elementSize)
@@ -313,13 +324,12 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
int inIdx = i * srcStride + j;
int outIdx = i * dstStride + j;
if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
- typeSize)
+ elementSize)
!= 0)
{
unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
unsigned char *outchar = (unsigned char *)outBuffer + outIdx;
- char values[4096];
- values[0] = 0;
+ char values[4096] = { 0 };
if (failuresPrinted == 0)
{
@@ -382,16 +392,14 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
cl_command_queue queue, const char *kernelCode,
bool localIsDst)
{
- ExplicitType vecType[] = {
- kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
- kULong, kFloat, kDouble, kNumExplicitTypes
- };
+ const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7,
+ 8, 13, 16, 32, 47, 64 };
// The margins below represent the number of elements between the end of
// one line and the start of the next. The strides are equivalent to the
// length of the line plus the chosen margin.
- unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
- unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
- unsigned int size, typeIndex, srcMargin, dstMargin;
+ // These have to be multipliers, because the margin must be a multiple of
+ // element size.
+ const unsigned int marginMultipliers[] = { 0, 10, 100 };
int errors = 0;
@@ -399,55 +407,27 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
{
log_info(
"Device does not support extended async copies. Skipping test.\n");
- return 0;
}
-
- for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
+ else
{
- if (vecType[typeIndex] == kDouble
- && !is_extension_available(deviceID, "cl_khr_fp64"))
- continue;
-
- if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
- && !gHasLong)
- continue;
-
- for (size = 0; vecSizes[size] != 0; size++)
+ for (const unsigned int elemSize : elemSizes)
{
- if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
- <= 2) // small type
+ for (const unsigned int srcMarginMultiplier : marginMultipliers)
{
- for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- srcMargin++)
+ for (const unsigned int dstMarginMultiplier : marginMultipliers)
{
- for (dstMargin = 0;
- dstMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- dstMargin++)
+ if (test_copy2D(deviceID, context, queue, kernelCode,
+ elemSize, srcMarginMultiplier * elemSize,
+ dstMarginMultiplier * elemSize, localIsDst))
{
- if (test_copy2D(deviceID, context, queue, kernelCode,
- vecType[typeIndex], vecSizes[size],
- smallTypesMarginSizes[srcMargin],
- smallTypesMarginSizes[dstMargin],
- localIsDst))
- {
- errors++;
- }
+ errors++;
}
}
}
- // not a small type, check only zero stride
- else if (test_copy2D(deviceID, context, queue, kernelCode,
- vecType[typeIndex], vecSizes[size], 0, 0,
- localIsDst))
- {
- errors++;
- }
}
}
- if (errors) return -1;
- return 0;
+
+ return errors ? -1 : 0;
}
int test_async_copy_global_to_local2D(cl_device_id deviceID, cl_context context,
diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp
index 5eb41ebc..aa22f3a2 100644
--- a/test_conformance/basic/test_async_copy3D.cpp
+++ b/test_conformance/basic/test_async_copy3D.cpp
@@ -27,9 +27,14 @@
static const char *async_global_to_local_kernel3D = R"OpenCLC(
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
-__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+ uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer,
int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
int planesCopiesPerWorkItem, int srcLineStride,
int dstLineStride, int srcPlaneStride, int dstPlaneStride ) {
@@ -38,7 +43,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
for (int j = 0; j < numLines; j++) {
for (int k = 0; k < numElementsPerLine; k++) {
const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
- localBuffer[index] = (%s)(%s)0;
+ for (int k = 0; k < STRUCT_SIZE; k++) {
+ localBuffer[index].byte[k] = 0;
+ }
}
}
}
@@ -48,7 +55,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
event_t event = async_work_group_copy_3D3D(localBuffer, 0, src,
planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride,
- sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines,
+ sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines,
planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride,
dstPlaneStride, 0);
@@ -69,9 +76,14 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
static const char *async_local_to_global_kernel3D = R"OpenCLC(
#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
-%s // optional pragma string
-__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+#define STRUCT_SIZE %d
+typedef struct __attribute__((packed))
+{
+ uchar byte[STRUCT_SIZE];
+} VarSizeStruct __attribute__((aligned(1)));
+
+__kernel void test_fn(const __global VarSizeStruct *src, __global VarSizeStruct *dst, __local VarSizeStruct *localBuffer,
int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
int planesCopiesPerWorkItem, int srcLineStride,
int dstLineStride, int srcPlaneStride, int dstPlaneStride) {
@@ -80,7 +92,9 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
for (int j = 0; j < numLines; j++) {
for (int k = 0; k < numElementsPerLine; k++) {
const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
- localBuffer[index] = (%s)(%s)0;
+ for (int k = 0; k < STRUCT_SIZE; k++) {
+ localBuffer[index].byte[k] = 0;
+ }
}
}
}
@@ -103,39 +117,26 @@ __kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *loca
event_t event = async_work_group_copy_3D3D(dst,
planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0,
- sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
+ sizeof(VarSizeStruct), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0);
wait_group_events(1, &event);
}
)OpenCLC";
-int test_copy3D(cl_device_id deviceID, cl_context context,
- cl_command_queue queue, const char *kernelCode,
- ExplicitType vecType, int vecSize, int srcLineMargin,
- int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin,
- bool localIsDst)
+int test_copy3D(const cl_device_id deviceID, const cl_context context,
+ const cl_command_queue queue, const char *const kernelCode,
+ const size_t elementSize, const int srcLineMargin,
+ const int dstLineMargin, const int srcPlaneMargin,
+ const int dstPlaneMargin, const bool localIsDst)
{
int error;
- clProgramWrapper program;
- clKernelWrapper kernel;
- clMemWrapper streams[2];
- size_t threads[1], localThreads[1];
- void *inBuffer, *outBuffer, *outBufferCopy;
- MTdata d;
- char vecNameString[64];
- vecNameString[0] = 0;
- if (vecSize == 1)
- sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
- else
- sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
- vecSize);
- size_t elementSize = get_explicit_type_size(vecType) * vecSize;
- log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, "
- "srcPlaneMargin = %d, dstPlaneMargin = %d\n",
- vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin,
- dstPlaneMargin);
+ log_info(
+ "Testing %d byte element with srcLineMargin = %d, dstLineMargin = %d, "
+ "srcPlaneMargin = %d, dstPlaneMargin = %d\n",
+ elementSize, srcLineMargin, dstLineMargin, srcPlaneMargin,
+ dstPlaneMargin);
cl_long max_local_mem_size;
error =
@@ -165,20 +166,16 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
test_error(error,
"clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
- char programSource[4096];
- programSource[0] = 0;
- char *programPtr;
+ char programSource[4096] = { 0 };
+ const char *programPtr = programSource;
- sprintf(programSource, kernelCode,
- vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
- : "",
- vecNameString, vecNameString, vecNameString, vecNameString,
- get_explicit_type_name(vecType), vecNameString, vecNameString);
+ sprintf(programSource, kernelCode, elementSize);
// log_info("program: %s\n", programSource);
- programPtr = programSource;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
error = create_single_kernel_helper(context, &program, &kernel, 1,
- (const char **)&programPtr, "test_fn");
+ &programPtr, "test_fn");
test_error(error, "Unable to create testing kernel");
size_t max_workgroup_size;
@@ -196,6 +193,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
test_error(error,
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+ cl_long max_work_group_size;
+ error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+ sizeof(max_work_group_size), &max_work_group_size,
+ NULL);
+ test_error(error,
+ "clGetDeviceInfo for CL_DEVICE_MAX_WORK_GROUP_SIZE failed.");
+
// Pick the minimum of the device and the kernel
if (max_workgroup_size > max_local_workgroup_size[0])
max_workgroup_size = max_local_workgroup_size[0];
@@ -208,8 +212,6 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin;
const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin;
- elementSize =
- get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
const size_t planesCopiesPerWorkItem = 2;
const size_t localStorageSpacePerWorkitem = elementSize
* planesCopiesPerWorkItem
@@ -251,9 +253,17 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
const size_t globalWorkgroupSize =
numberOfLocalWorkgroups * localWorkgroupSize;
- inBuffer = (void *)malloc(inBufferSize);
- outBuffer = (void *)malloc(outBufferSize);
- outBufferCopy = (void *)malloc(outBufferSize);
+ if ((localBufferSize / 4) > max_work_group_size)
+ {
+ log_info("Skipping due to resource requirements local:%db "
+ "max_work_group_size:%d\n",
+ localBufferSize, max_work_group_size);
+ return 0;
+ }
+
+ void *const inBuffer = (void *)malloc(inBufferSize);
+ void *const outBuffer = (void *)malloc(outBufferSize);
+ void *const outBufferCopy = (void *)malloc(outBufferSize);
const cl_int planesCopiesPerWorkItemInt =
static_cast<cl_int>(planesCopiesPerWorkItem);
@@ -270,18 +280,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
(int)localBufferSize, (int)inBufferSize, (int)outBufferSize,
planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt);
+ size_t threads[1], localThreads[1];
+
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
- d = init_genrand(gRandomSeed);
- generate_random_data(
- vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
- generate_random_data(
- vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
+ MTdata d = init_genrand(gRandomSeed);
+ generate_random_data(kChar, inBufferSize, d, inBuffer);
+ generate_random_data(kChar, outBufferSize, d, outBuffer);
free_mtdata(d);
d = NULL;
memcpy(outBufferCopy, outBuffer, outBufferSize);
+ clMemWrapper streams[2];
+
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
inBuffer, &error);
test_error(error, "Unable to create input buffer");
@@ -327,8 +339,7 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
// Verify
int failuresPrinted = 0;
- // Verify
- size_t typeSize = get_explicit_type_size(vecType) * vecSize;
+
for (int i = 0;
i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize;
i += elementSize)
@@ -341,14 +352,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
int inIdx = i * srcPlaneStride + j * srcLineStride + k;
int outIdx = i * dstPlaneStride + j * dstLineStride + k;
if (memcmp(((char *)inBuffer) + inIdx,
- ((char *)outBuffer) + outIdx, typeSize)
+ ((char *)outBuffer) + outIdx, elementSize)
!= 0)
{
unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
unsigned char *outchar =
(unsigned char *)outBuffer + outIdx;
- char values[4096];
- values[0] = 0;
+ char values[4096] = { 0 };
if (failuresPrinted == 0)
{
@@ -439,17 +449,14 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
cl_command_queue queue, const char *kernelCode,
bool localIsDst)
{
- ExplicitType vecType[] = {
- kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
- kULong, kFloat, kDouble, kNumExplicitTypes
- };
+ const unsigned int elemSizes[] = { 1, 2, 3, 4, 5, 6, 7,
+ 8, 13, 16, 32, 47, 64 };
// The margins below represent the number of elements between the end of
- // one line or plane and the start of the next. The strides are equivalent
- // to the size of the line or plane plus the chosen margin.
- unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
- unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
- unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin,
- dstPlaneMargin;
+ // one line and the start of the next. The strides are equivalent to the
+ // size of the line or plane plus the chosen margin.
+ // These have to be multipliers, because the margin must be a multiple of
+ // element size.
+ const unsigned int marginMultipliers[] = { 0, 10, 100 };
int errors = 0;
@@ -457,67 +464,36 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
{
log_info(
"Device does not support extended async copies. Skipping test.\n");
- return 0;
}
-
- for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
+ else
{
- if (vecType[typeIndex] == kDouble
- && !is_extension_available(deviceID, "cl_khr_fp64"))
- continue;
-
- if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
- && !gHasLong)
- continue;
-
- for (size = 0; vecSizes[size] != 0; size++)
+ for (const unsigned int elemSize : elemSizes)
{
- if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
- <= 2) // small type
+ for (const unsigned int srcLineMarginMultiplier : marginMultipliers)
{
- for (srcLineMargin = 0;
- srcLineMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- srcLineMargin++)
+ for (const unsigned int dstLineMarginMultiplier :
+ marginMultipliers)
{
- for (dstLineMargin = 0;
- dstLineMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- dstLineMargin++)
+ for (const unsigned int srcPlaneMarginMultiplier :
+ marginMultipliers)
{
- for (srcPlaneMargin = 0;
- srcPlaneMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- srcPlaneMargin++)
+ for (const unsigned int dstPlaneMarginMultiplier :
+ marginMultipliers)
{
- for (dstPlaneMargin = 0;
- dstPlaneMargin < sizeof(smallTypesMarginSizes)
- / sizeof(smallTypesMarginSizes[0]);
- dstPlaneMargin++)
+ if (test_copy3D(deviceID, context, queue,
+ kernelCode, elemSize,
+ srcLineMarginMultiplier * elemSize,
+ dstLineMarginMultiplier * elemSize,
+ srcPlaneMarginMultiplier * elemSize,
+ dstPlaneMarginMultiplier * elemSize,
+ localIsDst))
{
- if (test_copy3D(
- deviceID, context, queue, kernelCode,
- vecType[typeIndex], vecSizes[size],
- smallTypesMarginSizes[srcLineMargin],
- smallTypesMarginSizes[dstLineMargin],
- smallTypesMarginSizes[srcPlaneMargin],
- smallTypesMarginSizes[dstPlaneMargin],
- localIsDst))
- {
- errors++;
- }
+ errors++;
}
}
}
}
}
- // not a small type, check only zero stride
- else if (test_copy3D(deviceID, context, queue, kernelCode,
- vecType[typeIndex], vecSizes[size], 0, 0, 0, 0,
- localIsDst))
- {
- errors++;
- }
}
}
if (errors) return -1;
diff --git a/test_conformance/basic/test_async_strided_copy.cpp b/test_conformance/basic/test_async_strided_copy.cpp
index c456f38d..932e9b8c 100644
--- a/test_conformance/basic/test_async_strided_copy.cpp
+++ b/test_conformance/basic/test_async_strided_copy.cpp
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -20,15 +20,16 @@
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
-
-
+#include <vector>
#include "procs.h"
#include "harness/conversions.h"
+// clang-format off
+
static const char *async_strided_global_to_local_kernel =
"%s\n" // optional pragma string
-"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
@@ -46,7 +47,7 @@ static const char *async_strided_global_to_local_kernel =
static const char *async_strided_local_to_global_kernel =
"%s\n" // optional pragma string
-"%s__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
+"__kernel void test_fn( const __global %s *src, __global %s *dst, __local %s *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem, int stride )\n"
"{\n"
" int i;\n"
// Zero the local storage first
@@ -63,6 +64,7 @@ static const char *async_strided_local_to_global_kernel =
" wait_group_events( 1, &event );\n"
"}\n" ;
+// clang-format on
int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode, ExplicitType vecType, int vecSize, int stride)
{
@@ -71,8 +73,7 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
clKernelWrapper kernel;
clMemWrapper streams[ 2 ];
size_t threads[ 1 ], localThreads[ 1 ];
- void *inBuffer, *outBuffer;
- MTdata d;
+ MTdataHolder d(gRandomSeed);
char vecNameString[64]; vecNameString[0] = 0;
if (vecSize == 1)
@@ -94,10 +95,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
char programSource[4096]; programSource[0]=0;
char *programPtr;
- sprintf(programSource, kernelCode,
- vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- "",
- vecNameString, vecNameString, vecNameString, vecNameString, get_explicit_type_name(vecType), vecNameString, vecNameString);
+ std::string extStr = "";
+ if (vecType == kDouble)
+ extStr = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable";
+ else if (vecType == kHalf)
+ extStr = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable";
+
+ sprintf(programSource, kernelCode, extStr.c_str(), vecNameString,
+ vecNameString, vecNameString, vecNameString,
+ get_explicit_type_name(vecType), vecNameString, vecNameString);
//log_info("program: %s\n", programSource);
programPtr = programSource;
@@ -151,9 +157,9 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
size_t globalBufferSize = numberOfLocalWorkgroups*localBufferSize*stride;
size_t globalWorkgroupSize = numberOfLocalWorkgroups*localWorkgroupSize;
- inBuffer = (void*)malloc(globalBufferSize);
- outBuffer = (void*)malloc(globalBufferSize);
- memset(outBuffer, 0, globalBufferSize);
+ std::vector<unsigned char> inBuffer(globalBufferSize);
+ std::vector<unsigned char> outBuffer(globalBufferSize);
+ memset(outBuffer.data(), 0, globalBufferSize);
cl_int copiesPerWorkItemInt, copiesPerWorkgroup;
copiesPerWorkItemInt = (int)numberOfCopiesPerWorkitem;
@@ -165,13 +171,15 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
threads[0] = globalWorkgroupSize;
localThreads[0] = localWorkgroupSize;
- d = init_genrand( gRandomSeed );
- generate_random_data( vecType, globalBufferSize/get_explicit_type_size(vecType), d, inBuffer );
- free_mtdata(d); d = NULL;
+ generate_random_data(vecType,
+ globalBufferSize / get_explicit_type_size(vecType), d,
+ inBuffer.data());
- streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, inBuffer, &error );
+ streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+ inBuffer.data(), &error);
test_error( error, "Unable to create input buffer" );
- streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, globalBufferSize, outBuffer, &error );
+ streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, globalBufferSize,
+ outBuffer.data(), &error);
test_error( error, "Unable to create output buffer" );
error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
@@ -192,17 +200,20 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
test_error( error, "Unable to queue kernel" );
// Read
- error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, globalBufferSize, outBuffer, 0, NULL, NULL );
+ error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, globalBufferSize,
+ outBuffer.data(), 0, NULL, NULL);
test_error( error, "Unable to read results" );
// Verify
size_t typeSize = get_explicit_type_size(vecType)* vecSize;
for (int i=0; i<(int)globalBufferSize; i+=(int)elementSize*(int)stride)
{
- if (memcmp( ((char *)inBuffer)+i, ((char *)outBuffer)+i, typeSize) != 0 )
+ if (memcmp(&inBuffer.at(i), &outBuffer.at(i), typeSize) != 0)
{
- unsigned char * inchar = (unsigned char*)inBuffer + i;
- unsigned char * outchar = (unsigned char*)outBuffer + i;
+ unsigned char *inchar =
+ static_cast<unsigned char *>(&inBuffer.at(i));
+ unsigned char *outchar =
+ static_cast<unsigned char *>(&outBuffer.at(i));
char values[4096];
values[0] = 0;
@@ -215,34 +226,35 @@ int test_strided_copy(cl_device_id deviceID, cl_context context, cl_command_queu
sprintf(values + strlen( values), "%2x ", outchar[j]);
sprintf(values + strlen(values), "]");
log_error("%s\n", values);
- free(inBuffer);
- free(outBuffer);
return -1;
}
}
- free(inBuffer);
- free(outBuffer);
-
return 0;
}
int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_command_queue queue, const char *kernelCode)
{
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
- unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
- unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
+ const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
+ kInt, kUInt, kLong, kULong,
+ kFloat, kHalf, kDouble };
+ const unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
+ const unsigned int strideSizes[] = { 1, 3, 4, 5, 0 };
unsigned int size, typeIndex, stride;
int errors = 0;
- for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
- {
- if( vecType[ typeIndex ] == kDouble && !is_extension_available( deviceID, "cl_khr_fp64" ) )
- continue;
+ bool fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
+ bool fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
+ for (typeIndex = 0; typeIndex < vecType.size(); typeIndex++)
+ {
if (( vecType[ typeIndex ] == kLong || vecType[ typeIndex ] == kULong ) && !gHasLong )
continue;
+ else if (vecType[typeIndex] == kDouble && !fp64Support)
+ continue;
+ else if (vecType[typeIndex] == kHalf && !fp16Support)
+ continue;
for( size = 0; vecSizes[ size ] != 0; size++ )
{
@@ -260,9 +272,6 @@ int test_strided_copy_all_types(cl_device_id deviceID, cl_context context, cl_co
return 0;
}
-
-
-
int test_async_strided_copy_global_to_local(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return test_strided_copy_all_types( deviceID, context, queue, async_strided_global_to_local_kernel );
diff --git a/test_conformance/basic/test_barrier.cpp b/test_conformance/basic/test_barrier.cpp
index d20af14a..6352b42f 100644
--- a/test_conformance/basic/test_barrier.cpp
+++ b/test_conformance/basic/test_barrier.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -21,143 +21,136 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <algorithm>
+#include <numeric>
+#include <vector>
#include "procs.h"
-const char *barrier_kernel_code =
-"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
-"{\n"
-" int tid = get_local_id(0);\n"
-" int lsize = get_local_size(0);\n"
-" int i;\n"
-"\n"
-" tmp_sum[tid] = 0;\n"
-" for (i=tid; i<n; i+=lsize)\n"
-" tmp_sum[tid] += a[i];\n"
-" \n"
-" // updated to work for any workgroup size \n"
-" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
-" {\n"
-" barrier(CLK_GLOBAL_MEM_FENCE);\n"
-" if (tid + i < lsize)\n"
-" tmp_sum[tid] += tmp_sum[tid + i];\n"
-" lsize = i; \n"
-" }\n"
-"\n"
-" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
-" if (tid == 0)\n"
-" *sum = tmp_sum[0];\n"
-"}\n";
-
-
-static int
-verify_sum(int *inptr, int *outptr, int n)
+namespace {
+const char *barrier_kernel_code = R"(
+__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum,
+ __global int *sum)
{
- int r = 0;
- int i;
+ int tid = get_local_id(0);
+ int lsize = get_local_size(0);
+ int i;
- for (i=0; i<n; i++)
- {
- r += inptr[i];
- }
+ tmp_sum[tid] = 0;
+ for (i = tid; i < n; i += lsize) tmp_sum[tid] += a[i];
- if (r != outptr[0])
+ // updated to work for any workgroup size
+ for (i = hadd(lsize, 1); lsize > 1; i = hadd(i, 1))
{
- log_error("BARRIER test failed\n");
- return -1;
+ BARRIER(CLK_GLOBAL_MEM_FENCE);
+ if (tid + i < lsize) tmp_sum[tid] += tmp_sum[tid + i];
+ lsize = i;
}
- log_info("BARRIER test passed\n");
- return 0;
+ // no barrier is required here because last person to write to tmp_sum[0]
+ // was tid 0
+ if (tid == 0) *sum = tmp_sum[0];
}
+)";
-int
-test_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+void generate_random_inputs(std::vector<cl_int> &v)
{
- cl_mem streams[3];
- cl_int *input_ptr = NULL, *output_ptr = NULL;
- cl_program program;
- cl_kernel kernel;
- size_t global_threads[3];
- size_t local_threads[3];
- int err;
- int i;
- size_t max_local_workgroup_size[3];
- size_t max_threadgroup_size = 0;
- MTdata d;
+ RandomSeed seed(gRandomSeed);
- err = create_single_kernel_helper(context, &program, &kernel, 1, &barrier_kernel_code, "compute_sum" );
- test_error(err, "Failed to build kernel/program.");
+ auto random_generator = [&seed]() {
+ return static_cast<cl_int>(
+ get_random_float(-0x01000000, 0x01000000, seed));
+ };
+
+ std::generate(v.begin(), v.end(), random_generator);
+}
+
+int test_barrier_common(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements,
+ std::string barrier_str)
+{
+ clMemWrapper streams[3];
+ clProgramWrapper program;
+ clKernelWrapper kernel;
- err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
- sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
- test_error(err, "clGetKernelWorkgroupInfo failed.");
+ cl_int output;
+ int err;
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
- test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
+ size_t max_threadgroup_size = 0;
+ std::string build_options = std::string("-DBARRIER=") + barrier_str;
+ err = create_single_kernel_helper(context, &program, &kernel, 1,
+ &barrier_kernel_code, "compute_sum",
+ build_options.c_str());
+ test_error(err, "Failed to build kernel/program.");
- // Pick the minimum of the device and the kernel
- if (max_threadgroup_size > max_local_workgroup_size[0])
- max_threadgroup_size = max_local_workgroup_size[0];
+ err = get_max_allowed_1d_work_group_size_on_device(device, kernel,
+ &max_threadgroup_size);
+ test_error(err, "get_max_allowed_1d_work_group_size_on_device failed.");
// work group size must divide evenly into the global size
- while( num_elements % max_threadgroup_size )
- max_threadgroup_size--;
+ while (num_elements % max_threadgroup_size) max_threadgroup_size--;
- input_ptr = (int*)malloc(sizeof(int) * num_elements);
- output_ptr = (int*)malloc(sizeof(int));
+ std::vector<cl_int> input(num_elements);
streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, &err);
+ sizeof(cl_int) * num_elements, nullptr, &err);
test_error(err, "clCreateBuffer failed.");
- streams[1] =
- clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
+ streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int),
+ nullptr, &err);
test_error(err, "clCreateBuffer failed.");
streams[2] =
clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * max_threadgroup_size, NULL, &err);
+ sizeof(cl_int) * max_threadgroup_size, nullptr, &err);
test_error(err, "clCreateBuffer failed.");
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
- free_mtdata(d); d = NULL;
+ generate_random_inputs(input);
- err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
+ err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0,
+ sizeof(cl_int) * num_elements, input.data(), 0,
+ nullptr, nullptr);
test_error(err, "clEnqueueWriteBuffer failed.");
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
- err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
- err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
- err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
+ err = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+ err |= clSetKernelArg(kernel, 1, sizeof(num_elements), &num_elements);
+ err |= clSetKernelArg(kernel, 2, sizeof(streams[2]), &streams[2]);
+ err |= clSetKernelArg(kernel, 3, sizeof(streams[1]), &streams[1]);
test_error(err, "clSetKernelArg failed.");
- global_threads[0] = max_threadgroup_size;
- local_threads[0] = max_threadgroup_size;
+ size_t global_threads[] = { max_threadgroup_size };
+ size_t local_threads[] = { max_threadgroup_size };
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads,
+ local_threads, 0, nullptr, nullptr);
test_error(err, "clEnqueueNDRangeKernel failed.");
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
+ err = clEnqueueReadBuffer(queue, streams[1], true, 0, sizeof(cl_int),
+ &output, 0, nullptr, nullptr);
test_error(err, "clEnqueueReadBuffer failed.");
- err = verify_sum(input_ptr, output_ptr, num_elements);
-
-
- // cleanup
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseMemObject(streams[2]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr);
- free(output_ptr);
+ if (std::accumulate(input.begin(), input.end(), 0) != output)
+ {
+ log_error("%s test failed\n", barrier_str.c_str());
+ err = -1;
+ }
+ else
+ {
+ log_info("%s test passed\n", barrier_str.c_str());
+ }
return err;
}
+}
+int test_barrier(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return test_barrier_common(device, context, queue, num_elements, "barrier");
+}
-
-
-
+int test_wg_barrier(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return test_barrier_common(device, context, queue, num_elements,
+ "work_group_barrier");
+}
diff --git a/test_conformance/basic/test_constant.cpp b/test_conformance/basic/test_constant.cpp
index ed25c6ef..fc2667ee 100644
--- a/test_conformance/basic/test_constant.cpp
+++ b/test_conformance/basic/test_constant.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -21,41 +21,44 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <algorithm>
+#include <vector>
#include "procs.h"
-const char *constant_kernel_code =
-"__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" float ftmp = tmpF[tid]; \n"
-" float Itmp = tmpI[tid]; \n"
-" out[tid] = ftmp * Itmp; \n"
-"}\n";
-
-const char *loop_constant_kernel_code =
-"kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-" float sum = 0;\n"
-" for (int i = 0; i < num; i++) {\n"
-" float pos = i_pos[i*3];\n"
-" sum += pos;\n"
-" }\n"
-" out[tid] = sum;\n"
-"}\n";
-
-
-static int
-verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
+namespace {
+const char* constant_kernel_code = R"(
+__kernel void constant_kernel(__global float *out, __constant float *tmpF, __constant int *tmpI)
+{
+ int tid = get_global_id(0);
+
+ float ftmp = tmpF[tid];
+ float Itmp = tmpI[tid];
+ out[tid] = ftmp * Itmp;
+}
+)";
+
+const char* loop_constant_kernel_code = R"(
+kernel void loop_constant_kernel(global float *out, constant float *i_pos, int num)
{
- int i;
+ int tid = get_global_id(0);
+ float sum = 0;
+ for (int i = 0; i < num; i++) {
+ float pos = i_pos[i*3];
+ sum += pos;
+ }
+ out[tid] = sum;
+}
+)";
+
- for (i=0; i < n; i++)
+int verify(std::vector<cl_float>& tmpF, std::vector<cl_int>& tmpI,
+ std::vector<cl_float>& out)
+{
+ for (int i = 0; i < out.size(); i++)
{
float f = tmpF[i] * tmpI[i];
- if( out[i] != f )
+ if (out[i] != f)
{
log_error("CONSTANT test failed\n");
return -1;
@@ -66,214 +69,172 @@ verify(cl_float *tmpF, cl_int *tmpI, cl_float *out, int n)
return 0;
}
-
-static int
-verify_loop_constant(const cl_float *tmp, cl_float *out, cl_int l, int n)
+int verify_loop_constant(const std::vector<cl_float>& tmp,
+ std::vector<cl_float>& out, cl_int l)
{
- int i;
- cl_int j;
- for (i=0; i < n; i++)
- {
- float sum = 0;
- for (j=0; j < l; ++j)
- sum += tmp[j*3];
+ float sum = 0;
+ for (int j = 0; j < l; ++j) sum += tmp[j * 3];
- if( out[i] != sum )
- {
- log_error("loop CONSTANT test failed\n");
- return -1;
- }
+ auto predicate = [&sum](cl_float elem) { return sum != elem; };
+
+ if (std::any_of(out.cbegin(), out.cend(), predicate))
+ {
+ log_error("loop CONSTANT test failed\n");
+ return -1;
}
log_info("loop CONSTANT test passed\n");
return 0;
}
-int
-test_constant(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+template <typename T> void generate_random_inputs(std::vector<T>& v)
+{
+ RandomSeed seed(gRandomSeed);
+
+ auto random_generator = [&seed]() {
+ return static_cast<T>(get_random_float(-0x02000000, 0x02000000, seed));
+ };
+
+ std::generate(v.begin(), v.end(), random_generator);
+}
+}
+
+int test_constant(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- cl_mem streams[3];
- cl_int *tmpI;
- cl_float *tmpF, *out;
- cl_program program;
- cl_kernel kernel;
- size_t global_threads[3];
- int err;
- unsigned int i;
+ clMemWrapper streams[3];
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+
+ size_t global_threads[3];
+ int err;
cl_ulong maxSize, maxGlobalSize, maxAllocSize;
size_t num_floats, num_ints, constant_values;
- MTdata d;
- RoundingMode oldRoundMode;
+ RoundingMode oldRoundMode;
int isRTZ = 0;
- /* Verify our test buffer won't be bigger than allowed */
- err = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
- test_error( err, "Unable to get max constant buffer size" );
-
- log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n", maxSize);
-
- // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
- err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
- test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
-
- if (maxSize > maxGlobalSize / 4)
- maxSize = maxGlobalSize / 4;
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
- test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
-
- if (maxSize > maxAllocSize)
- maxSize = maxAllocSize;
-
- maxSize/=4;
- num_ints = (size_t)maxSize/sizeof(cl_int);
- num_floats = (size_t)maxSize/sizeof(cl_float);
- if (num_ints >= num_floats) {
- constant_values = num_floats;
- } else {
- constant_values = num_ints;
- }
-
- log_info("Test will attempt to use %lu bytes with one %lu byte constant int buffer and one %lu byte constant float buffer.\n",
- constant_values*sizeof(cl_int) + constant_values*sizeof(cl_float), constant_values*sizeof(cl_int), constant_values*sizeof(cl_float));
-
- tmpI = (cl_int*)malloc(sizeof(cl_int) * constant_values);
- tmpF = (cl_float*)malloc(sizeof(cl_float) * constant_values);
- out = (cl_float*)malloc(sizeof(cl_float) * constant_values);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_float) * constant_values, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_float) * constant_values, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
- streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * constant_values, NULL, NULL);
- if (!streams[2])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
+ /* Verify our test buffer won't be bigger than allowed */
+ err = clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
+ sizeof(maxSize), &maxSize, 0);
+ test_error(err, "Unable to get max constant buffer size");
+ log_info("Device reports CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE %llu bytes.\n",
+ maxSize);
- d = init_genrand( gRandomSeed );
- for (i=0; i<constant_values; i++) {
- tmpI[i] = (int)get_random_float(-0x02000000, 0x02000000, d);
- tmpF[i] = get_random_float(-0x02000000, 0x02000000, d);
- }
- free_mtdata(d); d = NULL;
+ // Limit test buffer size to 1/4 of CL_DEVICE_GLOBAL_MEM_SIZE
+ err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE,
+ sizeof(maxGlobalSize), &maxGlobalSize, 0);
+ test_error(err, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
- err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)tmpF, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
- err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, sizeof(cl_int)*constant_values, (void *)tmpI, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
+ maxSize = std::min(maxSize, maxGlobalSize / 4);
+
+ err = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+ sizeof(maxAllocSize), &maxAllocSize, 0);
+ test_error(err, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE");
+
+ maxSize = std::min(maxSize, maxAllocSize);
+
+ maxSize /= 4;
+ num_ints = static_cast<size_t>(maxSize / sizeof(cl_int));
+ num_floats = static_cast<size_t>(maxSize / sizeof(cl_float));
+ constant_values = std::min(num_floats, num_ints);
+
+
+ log_info(
+ "Test will attempt to use %lu bytes with one %lu byte constant int "
+ "buffer and one %lu byte constant float buffer.\n",
+ constant_values * sizeof(cl_int) + constant_values * sizeof(cl_float),
+ constant_values * sizeof(cl_int), constant_values * sizeof(cl_float));
+
+ std::vector<cl_int> tmpI(constant_values);
+ std::vector<cl_float> tmpF(constant_values);
+ std::vector<cl_float> out(constant_values);
+
+
+ streams[0] =
+ clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(cl_float) * constant_values, nullptr, &err);
+ test_error(err, "clCreateBuffer failed");
- err = create_single_kernel_helper(context, &program, &kernel, 1, &constant_kernel_code, "constant_kernel" );
- if (err) {
- log_error("Failed to create kernel and program: %d\n", err);
- return -1;
- }
+ streams[1] =
+ clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(cl_float) * constant_values, nullptr, &err);
+ test_error(err, "clCreateBuffer failed");
+
+ streams[2] =
+ clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(cl_int) * constant_values, nullptr, &err);
+ test_error(err, "clCreateBuffer failed");
+
+ generate_random_inputs(tmpI);
+ generate_random_inputs(tmpF);
+
+ err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0,
+ sizeof(cl_float) * constant_values, tmpF.data(),
+ 0, nullptr, nullptr);
+ test_error(err, "clEnqueueWriteBuffer failed");
+ err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0,
+ sizeof(cl_int) * constant_values, tmpI.data(), 0,
+ nullptr, nullptr);
+ test_error(err, "clEnqueueWriteBuffer faile.");
+
+ err = create_single_kernel_helper(context, &program, &kernel, 1,
+ &constant_kernel_code, "constant_kernel");
+ test_error(err, "Failed to create kernel and program");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
+ test_error(err, "clSetKernelArgs failed");
global_threads[0] = constant_values;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed: %d\n", err);
- return -1;
- }
- err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, global_threads,
+ nullptr, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed");
+
+ err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0,
+ sizeof(cl_float) * constant_values, out.data(), 0,
+ nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed");
- //If we only support rtz mode
- if( CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
+ // If we only support rtz mode
+ if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device) && gIsEmbedded)
{
oldRoundMode = set_round(kRoundTowardZero, kfloat);
isRTZ = 1;
}
- err = verify(tmpF, tmpI, out, (int)constant_values);
+ err = verify(tmpF, tmpI, out);
- if (isRTZ)
- (void)set_round(oldRoundMode, kfloat);
+ if (isRTZ) (void)set_round(oldRoundMode, kfloat);
// Loop constant buffer test
- cl_program loop_program;
- cl_kernel loop_kernel;
+ clProgramWrapper loop_program;
+ clKernelWrapper loop_kernel;
cl_int limit = 2;
- memset(out, 0, sizeof(cl_float) * constant_values);
+ memset(out.data(), 0, sizeof(cl_float) * constant_values);
err = create_single_kernel_helper(context, &loop_program, &loop_kernel, 1,
- &loop_constant_kernel_code, "loop_constant_kernel" );
- if (err) {
- log_error("Failed to create loop kernel and program: %d\n", err);
- return -1;
- }
+ &loop_constant_kernel_code,
+ "loop_constant_kernel");
+ test_error(err, "Failed to create kernel and program");
err = clSetKernelArg(loop_kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(loop_kernel, 1, sizeof streams[1], &streams[1]);
err |= clSetKernelArg(loop_kernel, 2, sizeof(limit), &limit);
- if (err != CL_SUCCESS) {
- log_error("clSetKernelArgs for loop kernel failed\n");
- return -1;
- }
+ test_error(err, "clSetKernelArgs failed");
- err = clEnqueueNDRangeKernel( queue, loop_kernel, 1, NULL, global_threads, NULL, 0, NULL, NULL );
- if (err != CL_SUCCESS) {
- log_error("clEnqueueNDRangeKernel failed: %d\n", err);
- return -1;
- }
- err = clEnqueueReadBuffer( queue, streams[0], CL_TRUE, 0, sizeof(cl_float)*constant_values, (void *)out, 0, NULL, NULL );
- if (err != CL_SUCCESS) {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
+ err = clEnqueueNDRangeKernel(queue, loop_kernel, 1, nullptr, global_threads,
+ nullptr, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed");
- err = verify_loop_constant(tmpF, out, limit, (int)constant_values);
+ err = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0,
+ sizeof(cl_float) * constant_values, out.data(), 0,
+ nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed");
+
+ err = verify_loop_constant(tmpF, out, limit);
- // cleanup
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseMemObject(streams[2]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseKernel(loop_kernel);
- clReleaseProgram(loop_program);
- free(tmpI);
- free(tmpF);
- free(out);
return err;
}
-
-
-
-
-
diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp
index d28f7e41..c2ea24ef 100644
--- a/test_conformance/basic/test_enqueue_map.cpp
+++ b/test_conformance/basic/test_enqueue_map.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -26,6 +26,7 @@
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
+// clang-format off
const cl_mem_flags flag_set[] = {
CL_MEM_ALLOC_HOST_PTR,
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
@@ -33,93 +34,105 @@ const cl_mem_flags flag_set[] = {
CL_MEM_COPY_HOST_PTR,
0
};
-const char* flag_set_names[] = {
+
+const char *flag_set_names[] = {
"CL_MEM_ALLOC_HOST_PTR",
"CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR",
"CL_MEM_USE_HOST_PTR",
"CL_MEM_COPY_HOST_PTR",
"0"
};
+// clang-format on
-int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
- const size_t bufferSize = 256*256;
- MTdataHolder d{gRandomSeed};
+ const size_t bufferSize = 256 * 256;
+ MTdataHolder d{ gRandomSeed };
BufferOwningPtr<cl_char> hostPtrData{ malloc(bufferSize) };
BufferOwningPtr<cl_char> referenceData{ malloc(bufferSize) };
- BufferOwningPtr<cl_char> finalData{malloc(bufferSize)};
+ BufferOwningPtr<cl_char> finalData{ malloc(bufferSize) };
- for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++)
+ for (size_t src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set);
+ src_flag_id++)
{
clMemWrapper memObject;
- log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
+ log_info("Testing with cl_mem_flags src: %s\n",
+ flag_set_names[src_flag_id]);
generate_random_data(kChar, (unsigned int)bufferSize, d, hostPtrData);
memcpy(referenceData, hostPtrData, bufferSize);
void *hostPtr = nullptr;
cl_mem_flags flags = flag_set[src_flag_id];
- bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR);
+ bool hasHostPtr =
+ (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR);
if (hasHostPtr) hostPtr = hostPtrData;
- memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error);
- test_error( error, "Unable to create testing buffer" );
+ memObject = clCreateBuffer(context, flags, bufferSize, hostPtr, &error);
+ test_error(error, "Unable to create testing buffer");
if (!hasHostPtr)
{
error =
- clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize,
- hostPtrData, 0, NULL, NULL);
- test_error( error, "clEnqueueWriteBuffer failed");
+ clEnqueueWriteBuffer(queue, memObject, CL_TRUE, 0, bufferSize,
+ hostPtrData, 0, NULL, NULL);
+ test_error(error, "clEnqueueWriteBuffer failed");
}
- for( int i = 0; i < 128; i++ )
+ for (int i = 0; i < 128; i++)
{
- size_t offset = (size_t)random_in_range( 0, (int)bufferSize - 1, d );
- size_t length = (size_t)random_in_range( 1, (int)( bufferSize - offset ), d );
-
- cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
- offset, length, 0, NULL, NULL, &error );
- if( error != CL_SUCCESS )
- {
- print_error( error, "clEnqueueMapBuffer call failed" );
- log_error( "\tOffset: %d Length: %d\n", (int)offset, (int)length );
- return -1;
- }
-
- // Write into the region
- for( size_t j = 0; j < length; j++ )
- {
- cl_char spin = (cl_char)genrand_int32( d );
-
- // Test read AND write in one swipe
- cl_char value = mappedRegion[ j ];
- value = spin - value;
- mappedRegion[ j ] = value;
-
- // Also update the initial data array
- value = referenceData[offset + j];
- value = spin - value;
- referenceData[offset + j] = value;
- }
-
- // Unmap
- error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
- test_error( error, "Unable to unmap buffer" );
+ size_t offset = (size_t)random_in_range(0, (int)bufferSize - 1, d);
+ size_t length =
+ (size_t)random_in_range(1, (int)(bufferSize - offset), d);
+
+ cl_char *mappedRegion = (cl_char *)clEnqueueMapBuffer(
+ queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset,
+ length, 0, NULL, NULL, &error);
+ if (error != CL_SUCCESS)
+ {
+ print_error(error, "clEnqueueMapBuffer call failed");
+ log_error("\tOffset: %d Length: %d\n", (int)offset,
+ (int)length);
+ return -1;
+ }
+
+ // Write into the region
+ for (size_t j = 0; j < length; j++)
+ {
+ cl_char spin = (cl_char)genrand_int32(d);
+
+ // Test read AND write in one swipe
+ cl_char value = mappedRegion[j];
+ value = spin - value;
+ mappedRegion[j] = value;
+
+ // Also update the initial data array
+ value = referenceData[offset + j];
+ value = spin - value;
+ referenceData[offset + j] = value;
+ }
+
+ // Unmap
+ error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0,
+ NULL, NULL);
+ test_error(error, "Unable to unmap buffer");
}
- // Final validation: read actual values of buffer and compare against our reference
- error = clEnqueueReadBuffer( queue, memObject, CL_TRUE, 0, bufferSize, finalData, 0, NULL, NULL );
- test_error( error, "Unable to read results" );
+ // Final validation: read actual values of buffer and compare against
+ // our reference
+ error = clEnqueueReadBuffer(queue, memObject, CL_TRUE, 0, bufferSize,
+ finalData, 0, NULL, NULL);
+ test_error(error, "Unable to read results");
- for( size_t q = 0; q < bufferSize; q++ )
+ for (size_t q = 0; q < bufferSize; q++)
{
if (referenceData[q] != finalData[q])
{
log_error(
- "ERROR: Sample %d did not validate! Got %d, expected %d\n",
- (int)q, (int)finalData[q], (int)referenceData[q]);
+ "ERROR: Sample %d did not validate! Got %d, expected %d\n",
+ (int)q, (int)finalData[q], (int)referenceData[q]);
return -1;
}
}
@@ -128,112 +141,129 @@ int test_enqueue_map_buffer(cl_device_id deviceID, cl_context context, cl_comman
return 0;
}
-int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_enqueue_map_image(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT32 };
const size_t imageSize = 256;
const size_t imageDataSize = imageSize * imageSize * 4 * sizeof(cl_uint);
- PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
BufferOwningPtr<cl_uint> hostPtrData{ malloc(imageDataSize) };
BufferOwningPtr<cl_uint> referenceData{ malloc(imageDataSize) };
- BufferOwningPtr<cl_uint> finalData{malloc(imageDataSize)};
-
- MTdataHolder d{gRandomSeed};
- for (int src_flag_id=0; src_flag_id < ARRAY_SIZE(flag_set); src_flag_id++) {
- clMemWrapper memObject;
- log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
-
- generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d,
- hostPtrData);
- memcpy(referenceData, hostPtrData, imageDataSize);
-
- cl_mem_flags flags = flag_set[src_flag_id];
- bool hasHostPtr = (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR);
- void *hostPtr = nullptr;
- if (hasHostPtr) hostPtr = hostPtrData;
- memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format,
- imageSize, imageSize, 0, hostPtr, &error );
- test_error( error, "Unable to create testing buffer" );
-
- if (!hasHostPtr) {
- size_t write_origin[3]={0,0,0}, write_region[3]={imageSize, imageSize, 1};
- error =
- clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin, write_region,
- 0, 0, hostPtrData, 0, NULL, NULL);
- test_error( error, "Unable to write to testing buffer" );
- }
-
- for( int i = 0; i < 128; i++ )
+ BufferOwningPtr<cl_uint> finalData{ malloc(imageDataSize) };
+
+ MTdataHolder d{ gRandomSeed };
+ for (size_t src_flag_id = 0; src_flag_id < ARRAY_SIZE(flag_set);
+ src_flag_id++)
{
+ clMemWrapper memObject;
+ log_info("Testing with cl_mem_flags src: %s\n",
+ flag_set_names[src_flag_id]);
+
+ generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4),
+ d, hostPtrData);
+ memcpy(referenceData, hostPtrData, imageDataSize);
+
+ cl_mem_flags flags = flag_set[src_flag_id];
+ bool hasHostPtr =
+ (flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_COPY_HOST_PTR);
+ void *hostPtr = nullptr;
+ if (hasHostPtr) hostPtr = hostPtrData;
+ memObject = create_image_2d(context, CL_MEM_READ_WRITE | flags, &format,
+ imageSize, imageSize, 0, hostPtr, &error);
+ test_error(error, "Unable to create testing buffer");
- size_t offset[3], region[3];
- size_t rowPitch;
-
- offset[ 0 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
- region[ 0 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 0 ] - 1), d );
- offset[ 1 ] = (size_t)random_in_range( 0, (int)imageSize - 1, d );
- region[ 1 ] = (size_t)random_in_range( 1, (int)( imageSize - offset[ 1 ] - 1), d );
- offset[ 2 ] = 0;
- region[ 2 ] = 1;
- cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage( queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE,
- offset, region, &rowPitch, NULL, 0, NULL, NULL, &error );
- if( error != CL_SUCCESS )
- {
- print_error( error, "clEnqueueMapImage call failed" );
- log_error( "\tOffset: %d,%d Region: %d,%d\n", (int)offset[0], (int)offset[1], (int)region[0], (int)region[1] );
- return -1;
- }
-
- // Write into the region
- cl_uint *mappedPtr = mappedRegion;
- for( size_t y = 0; y < region[ 1 ]; y++ )
- {
- for( size_t x = 0; x < region[ 0 ] * 4; x++ )
+ if (!hasHostPtr)
{
- cl_int spin = (cl_int)random_in_range( 16, 1024, d );
-
- cl_int value;
- // Test read AND write in one swipe
- value = mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ];
- value = spin - value;
- mappedPtr[ ( y * rowPitch/sizeof(cl_uint) ) + x ] = value;
-
- // Also update the initial data array
- value =
- referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x];
- value = spin - value;
- referenceData[((offset[1] + y) * imageSize + offset[0]) * 4 + x] =
- value;
+ size_t write_origin[3] = { 0, 0, 0 },
+ write_region[3] = { imageSize, imageSize, 1 };
+ error = clEnqueueWriteImage(queue, memObject, CL_TRUE, write_origin,
+ write_region, 0, 0, hostPtrData, 0,
+ NULL, NULL);
+ test_error(error, "Unable to write to testing buffer");
}
- }
- // Unmap
- error = clEnqueueUnmapMemObject( queue, memObject, mappedRegion, 0, NULL, NULL );
- test_error( error, "Unable to unmap buffer" );
- }
+ for (int i = 0; i < 128; i++)
+ {
- // Final validation: read actual values of buffer and compare against our reference
- size_t finalOrigin[3] = { 0, 0, 0 }, finalRegion[3] = { imageSize, imageSize, 1 };
- error = clEnqueueReadImage( queue, memObject, CL_TRUE, finalOrigin, finalRegion, 0, 0, finalData, 0, NULL, NULL );
- test_error( error, "Unable to read results" );
+ size_t offset[3], region[3];
+ size_t rowPitch;
+
+ offset[0] = (size_t)random_in_range(0, (int)imageSize - 1, d);
+ region[0] =
+ (size_t)random_in_range(1, (int)(imageSize - offset[0] - 1), d);
+ offset[1] = (size_t)random_in_range(0, (int)imageSize - 1, d);
+ region[1] =
+ (size_t)random_in_range(1, (int)(imageSize - offset[1] - 1), d);
+ offset[2] = 0;
+ region[2] = 1;
+ cl_uint *mappedRegion = (cl_uint *)clEnqueueMapImage(
+ queue, memObject, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, offset,
+ region, &rowPitch, NULL, 0, NULL, NULL, &error);
+ if (error != CL_SUCCESS)
+ {
+ print_error(error, "clEnqueueMapImage call failed");
+ log_error("\tOffset: %d,%d Region: %d,%d\n", (int)offset[0],
+ (int)offset[1], (int)region[0], (int)region[1]);
+ return -1;
+ }
- for( size_t q = 0; q < imageSize * imageSize * 4; q++ )
- {
- if (referenceData[q] != finalData[q])
+ // Write into the region
+ cl_uint *mappedPtr = mappedRegion;
+ for (size_t y = 0; y < region[1]; y++)
+ {
+ for (size_t x = 0; x < region[0] * 4; x++)
+ {
+ cl_int spin = (cl_int)random_in_range(16, 1024, d);
+
+ cl_int value;
+ // Test read AND write in one swipe
+ value = mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x];
+ value = spin - value;
+ mappedPtr[(y * rowPitch / sizeof(cl_uint)) + x] = value;
+
+ // Also update the initial data array
+ value =
+ referenceData[((offset[1] + y) * imageSize + offset[0])
+ * 4
+ + x];
+ value = spin - value;
+ referenceData[((offset[1] + y) * imageSize + offset[0]) * 4
+ + x] = value;
+ }
+ }
+
+ // Unmap
+ error = clEnqueueUnmapMemObject(queue, memObject, mappedRegion, 0,
+ NULL, NULL);
+ test_error(error, "Unable to unmap buffer");
+ }
+
+ // Final validation: read actual values of buffer and compare against
+ // our reference
+ size_t finalOrigin[3] = { 0, 0, 0 },
+ finalRegion[3] = { imageSize, imageSize, 1 };
+ error = clEnqueueReadImage(queue, memObject, CL_TRUE, finalOrigin,
+ finalRegion, 0, 0, finalData, 0, NULL, NULL);
+ test_error(error, "Unable to read results");
+
+ for (size_t q = 0; q < imageSize * imageSize * 4; q++)
{
- log_error("ERROR: Sample %d (coord %d,%d) did not validate! Got "
- "%d, expected %d\n",
- (int)q, (int)((q / 4) % imageSize),
- (int)((q / 4) / imageSize), (int)finalData[q],
- (int)referenceData[q]);
- return -1;
+ if (referenceData[q] != finalData[q])
+ {
+ log_error(
+ "ERROR: Sample %d (coord %d,%d) did not validate! Got "
+ "%d, expected %d\n",
+ (int)q, (int)((q / 4) % imageSize),
+ (int)((q / 4) / imageSize), (int)finalData[q],
+ (int)referenceData[q]);
+ return -1;
+ }
}
- }
- } // cl_mem_flags
+ } // cl_mem_flags
return 0;
}
-
diff --git a/test_conformance/basic/test_fpmath.cpp b/test_conformance/basic/test_fpmath.cpp
new file mode 100644
index 00000000..9bdb192e
--- /dev/null
+++ b/test_conformance/basic/test_fpmath.cpp
@@ -0,0 +1,386 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+#include "harness/rounding_mode.h"
+#include "harness/stringHelpers.h"
+
+#include <CL/cl_half.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <algorithm>
+#include <functional>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "procs.h"
+
+static const char *fp_kernel_code = R"(
+%s
+__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst)
+{
+ int tid = get_global_id(0);
+
+ dst[tid] = srcA[tid] OP srcB[tid];
+})";
+
+extern cl_half_rounding_mode halfRoundingMode;
+
+#define HFF(num) cl_half_from_float(num, halfRoundingMode)
+#define HTF(num) cl_half_to_float(num)
+
+template <typename T> double toDouble(T val)
+{
+ if (std::is_same<cl_half, T>::value)
+ return HTF(val);
+ else
+ return val;
+}
+
+bool isHalfNan(cl_half v)
+{
+ // Extract FP16 exponent and mantissa
+ uint16_t h_exp = (v >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
+ uint16_t h_mant = v & 0x3FF;
+
+ // NaN test
+ return (h_exp == 0x1F && h_mant != 0);
+}
+
+cl_half half_plus(cl_half a, cl_half b)
+{
+ return HFF(std::plus<float>()(HTF(a), HTF(b)));
+}
+
+cl_half half_minus(cl_half a, cl_half b)
+{
+ return HFF(std::minus<float>()(HTF(a), HTF(b)));
+}
+
+cl_half half_mult(cl_half a, cl_half b)
+{
+ return HFF(std::multiplies<float>()(HTF(a), HTF(b)));
+}
+
+template <typename T> struct TestDef
+{
+ const char op;
+ std::function<T(T, T)> ref;
+ std::string type_str;
+ size_t vec_size;
+};
+
+template <typename T>
+int verify_fp(std::vector<T> (&input)[2], std::vector<T> &output,
+ const TestDef<T> &test)
+{
+ auto &inA = input[0];
+ auto &inB = input[1];
+ for (size_t i = 0; i < output.size(); i++)
+ {
+ bool nan_test = false;
+
+ T r = test.ref(inA[i], inB[i]);
+
+ if (std::is_same<T, cl_half>::value)
+ nan_test = !(isHalfNan(r) && isHalfNan(output[i]));
+
+ if (r != output[i] && nan_test)
+ {
+ log_error("FP math test for type: %s, vec size: %zu, failed at "
+ "index %zu, %a '%c' %a, expected %a, get %a\n",
+ test.type_str.c_str(), test.vec_size, i, toDouble(inA[i]),
+ test.op, toDouble(inB[i]), toDouble(r),
+ toDouble(output[i]));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+template <typename T> void generate_random_inputs(std::vector<T> (&input)[2])
+{
+ RandomSeed seed(gRandomSeed);
+
+ if (std::is_same<T, float>::value)
+ {
+ auto random_generator = [&seed]() {
+ return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31),
+ MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed);
+ };
+ for (auto &v : input)
+ std::generate(v.begin(), v.end(), random_generator);
+ }
+ else if (std::is_same<T, double>::value)
+ {
+ auto random_generator = [&seed]() {
+ return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63),
+ MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63),
+ seed);
+ };
+ for (auto &v : input)
+ std::generate(v.begin(), v.end(), random_generator);
+ }
+ else
+ {
+ auto random_generator = [&seed]() {
+ return HFF(get_random_float(-MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8),
+ MAKE_HEX_FLOAT(0x1.0p8f, 0x1, 8),
+ seed));
+ };
+ for (auto &v : input)
+ std::generate(v.begin(), v.end(), random_generator);
+ }
+}
+
+struct TypesIterator
+{
+ using TypeIter = std::tuple<cl_float, cl_half, cl_double>;
+
+ TypesIterator(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elems)
+ : context(context), queue(queue), fpConfigHalf(0), fpConfigFloat(0),
+ num_elements(num_elems)
+ {
+ // typeid().name one day
+ type2name[sizeof(cl_half)] = "half";
+ type2name[sizeof(cl_float)] = "float";
+ type2name[sizeof(cl_double)] = "double";
+
+ fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
+ fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
+
+ fpConfigFloat = get_default_rounding_mode(deviceID);
+
+ if (fp16Support)
+ fpConfigHalf =
+ get_default_rounding_mode(deviceID, CL_DEVICE_HALF_FP_CONFIG);
+
+ for_each_elem(it);
+ }
+
+ template <typename T> int test_fpmath(TestDef<T> &test)
+ {
+ constexpr size_t vecSizes[] = { 1, 2, 4, 8, 16 };
+ cl_int err = CL_SUCCESS;
+
+ std::ostringstream sstr;
+ if (std::is_same<T, double>::value)
+ sstr << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+ if (std::is_same<T, cl_half>::value)
+ sstr << "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+
+ std::string program_source =
+ str_sprintf(std::string(fp_kernel_code), sstr.str().c_str());
+
+ for (unsigned i = 0; i < ARRAY_SIZE(vecSizes); i++)
+ {
+ test.vec_size = vecSizes[i];
+
+ std::ostringstream vecNameStr;
+ vecNameStr << test.type_str;
+ if (test.vec_size != 1) vecNameStr << test.vec_size;
+
+ clMemWrapper streams[3];
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+
+ size_t length = sizeof(T) * num_elements * test.vec_size;
+
+ bool isRTZ = false;
+ RoundingMode oldMode = kDefaultRoundingMode;
+
+
+ // If we only support rtz mode
+ if (std::is_same<T, cl_half>::value)
+ {
+ if (CL_FP_ROUND_TO_ZERO == fpConfigHalf)
+ {
+ isRTZ = true;
+ oldMode = get_round();
+ }
+ }
+ else if (std::is_same<T, float>::value)
+ {
+ if (CL_FP_ROUND_TO_ZERO == fpConfigFloat)
+ {
+ isRTZ = true;
+ oldMode = get_round();
+ }
+ }
+
+ std::vector<T> inputs[]{
+ std::vector<T>(test.vec_size * num_elements),
+ std::vector<T>(test.vec_size * num_elements)
+ };
+ std::vector<T> output =
+ std::vector<T>(test.vec_size * num_elements);
+
+ generate_random_inputs<T>(inputs);
+
+ for (size_t i = 0; i < ARRAY_SIZE(streams); i++)
+ {
+ streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, length,
+ NULL, &err);
+ test_error(err, "clCreateBuffer failed.");
+ }
+ for (size_t i = 0; i < ARRAY_SIZE(inputs); i++)
+ {
+ err =
+ clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length,
+ inputs[i].data(), 0, NULL, NULL);
+ test_error(err, "clEnqueueWriteBuffer failed.");
+ }
+
+ std::string build_options = "-DTYPE=";
+ build_options.append(vecNameStr.str())
+ .append(" -DOP=")
+ .append(1, test.op);
+
+ const char *ptr = program_source.c_str();
+ err =
+ create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
+ "test_fp", build_options.c_str());
+
+ test_error(err, "create_single_kernel_helper failed");
+
+ for (size_t i = 0; i < ARRAY_SIZE(streams); i++)
+ {
+ err =
+ clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+ test_error(err, "clSetKernelArgs failed.");
+ }
+
+ size_t threads[] = { static_cast<size_t>(num_elements) };
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL,
+ 0, NULL, NULL);
+ test_error(err, "clEnqueueNDRangeKernel failed.");
+
+ err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length,
+ output.data(), 0, NULL, NULL);
+ test_error(err, "clEnqueueReadBuffer failed.");
+
+ if (isRTZ) set_round(kRoundTowardZero, kfloat);
+
+ err = verify_fp(inputs, output, test);
+
+ if (isRTZ) set_round(oldMode, kfloat);
+
+ test_error(err, "test verification failed");
+ log_info("FP '%c' '%s' test passed\n", test.op,
+ vecNameStr.str().c_str());
+ }
+
+ return err;
+ }
+
+ template <typename T> int test_fpmath_common()
+ {
+ int err = TEST_PASS;
+ if (std::is_same<cl_half, T>::value)
+ {
+ TestDef<T> tests[] = { { '+', half_plus, type2name[sizeof(T)] },
+ { '-', half_minus, type2name[sizeof(T)] },
+ { '*', half_mult, type2name[sizeof(T)] } };
+ for (auto &test : tests) err |= test_fpmath<T>(test);
+ }
+ else
+ {
+ TestDef<T> tests[] = {
+ { '+', std::plus<T>(), type2name[sizeof(T)] },
+ { '-', std::minus<T>(), type2name[sizeof(T)] },
+ { '*', std::multiplies<T>(), type2name[sizeof(T)] }
+ };
+ for (auto &test : tests) err |= test_fpmath<T>(test);
+ }
+
+ return err;
+ }
+
+ template <typename T> bool skip_type()
+ {
+ if (std::is_same<double, T>::value && !fp64Support)
+ return true;
+ else if (std::is_same<cl_half, T>::value && !fp16Support)
+ return true;
+ return false;
+ }
+
+ template <std::size_t Cnt = 0, typename Type>
+ void iterate_type(const Type &t)
+ {
+ bool doTest = !skip_type<Type>();
+
+ if (doTest)
+ {
+ if (test_fpmath_common<Type>())
+ {
+ throw std::runtime_error("test_fpmath_common failed\n");
+ }
+ }
+ }
+
+ template <std::size_t Cnt = 0, typename... Tp>
+ inline typename std::enable_if<Cnt == sizeof...(Tp), void>::type
+ for_each_elem(
+ const std::tuple<Tp...> &) // Unused arguments are given no names.
+ {}
+
+ template <std::size_t Cnt = 0, typename... Tp>
+ inline typename std::enable_if < Cnt<sizeof...(Tp), void>::type
+ for_each_elem(const std::tuple<Tp...> &t)
+ {
+ iterate_type<Cnt>(std::get<Cnt>(t));
+ for_each_elem<Cnt + 1, Tp...>(t);
+ }
+
+protected:
+ TypeIter it;
+
+ cl_context context;
+ cl_command_queue queue;
+
+ cl_device_fp_config fpConfigHalf;
+ cl_device_fp_config fpConfigFloat;
+
+ bool fp16Support;
+ bool fp64Support;
+
+ int num_elements;
+ std::map<size_t, std::string> type2name;
+};
+
+int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue,
+ int num_elements)
+{
+ try
+ {
+ TypesIterator(device, context, queue, num_elements);
+ } catch (const std::runtime_error &e)
+ {
+ log_error("%s", e.what());
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp
deleted file mode 100644
index fced0f4e..00000000
--- a/test_conformance/basic/test_fpmath_float.cpp
+++ /dev/null
@@ -1,196 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "harness/rounding_mode.h"
-
-#include <algorithm>
-#include <functional>
-#include <string>
-#include <vector>
-
-#include "procs.h"
-
-struct TestDef
-{
- const char op;
- std::function<float(float, float)> ref;
-};
-
-static const char *fp_kernel_code = R"(
-__kernel void test_fp(__global TYPE *srcA, __global TYPE *srcB, __global TYPE *dst)
-{
- int tid = get_global_id(0);
-
- dst[tid] = srcA[tid] OP srcB[tid];
-})";
-
-static int verify_fp(std::vector<float> (&input)[2], std::vector<float> &output,
- const TestDef &test)
-{
-
- auto &inA = input[0];
- auto &inB = input[1];
- for (int i = 0; i < output.size(); i++)
- {
- float r = test.ref(inA[i], inB[i]);
- if (r != output[i])
- {
- log_error("FP '%c' float test failed\n", test.op);
- return -1;
- }
- }
-
- log_info("FP '%c' float test passed\n", test.op);
- return 0;
-}
-
-
-void generate_random_inputs(std::vector<cl_float> (&input)[2])
-{
- RandomSeed seed(gRandomSeed);
-
- auto random_generator = [&seed]() {
- return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31),
- MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), seed);
- };
-
- for (auto &v : input)
- {
- std::generate(v.begin(), v.end(), random_generator);
- }
-}
-
-template <size_t N>
-int test_fpmath(cl_device_id device, cl_context context, cl_command_queue queue,
- int num_elements, const std::string type_str,
- const TestDef &test)
-{
- clMemWrapper streams[3];
- clProgramWrapper program;
- clKernelWrapper kernel;
-
- int err;
-
- size_t length = sizeof(cl_float) * num_elements * N;
-
- int isRTZ = 0;
- RoundingMode oldMode = kDefaultRoundingMode;
-
- // If we only support rtz mode
- if (CL_FP_ROUND_TO_ZERO == get_default_rounding_mode(device))
- {
- isRTZ = 1;
- oldMode = get_round();
- }
-
-
- std::vector<cl_float> inputs[]{ std::vector<cl_float>(N * num_elements),
- std::vector<cl_float>(N * num_elements) };
- std::vector<cl_float> output = std::vector<cl_float>(N * num_elements);
-
- generate_random_inputs(inputs);
-
- for (int i = 0; i < ARRAY_SIZE(streams); i++)
- {
- streams[i] =
- clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
- test_error(err, "clCreateBuffer failed.");
- }
- for (int i = 0; i < ARRAY_SIZE(inputs); i++)
- {
- err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0, length,
- inputs[i].data(), 0, NULL, NULL);
- test_error(err, "clEnqueueWriteBuffer failed.");
- }
-
- std::string build_options = "-DTYPE=";
- build_options.append(type_str).append(" -DOP=").append(1, test.op);
-
- err = create_single_kernel_helper(context, &program, &kernel, 1,
- &fp_kernel_code, "test_fp",
- build_options.c_str());
-
- test_error(err, "create_single_kernel_helper failed");
-
- for (int i = 0; i < ARRAY_SIZE(streams); i++)
- {
- err = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
- test_error(err, "clSetKernelArgs failed.");
- }
-
- size_t threads[] = { static_cast<size_t>(num_elements) };
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL,
- NULL);
- test_error(err, "clEnqueueNDRangeKernel failed.");
-
- err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length,
- output.data(), 0, NULL, NULL);
- test_error(err, "clEnqueueReadBuffer failed.");
-
- if (isRTZ) set_round(kRoundTowardZero, kfloat);
-
- err = verify_fp(inputs, output, test);
-
- if (isRTZ) set_round(oldMode, kfloat);
-
- return err;
-}
-
-
-template <size_t N>
-int test_fpmath_common(cl_device_id device, cl_context context,
- cl_command_queue queue, int num_elements,
- const std::string type_str)
-{
- TestDef tests[] = { { '+', std::plus<float>() },
- { '-', std::minus<float>() },
- { '*', std::multiplies<float>() } };
- int err = TEST_PASS;
-
- for (const auto &test : tests)
- {
- err |= test_fpmath<N>(device, context, queue, num_elements, type_str,
- test);
- }
-
- return err;
-}
-
-int test_fpmath_float(cl_device_id device, cl_context context,
- cl_command_queue queue, int num_elements)
-{
- return test_fpmath_common<1>(device, context, queue, num_elements, "float");
-}
-
-int test_fpmath_float2(cl_device_id device, cl_context context,
- cl_command_queue queue, int num_elements)
-{
- return test_fpmath_common<2>(device, context, queue, num_elements,
- "float2");
-}
-
-int test_fpmath_float4(cl_device_id device, cl_context context,
- cl_command_queue queue, int num_elements)
-{
- return test_fpmath_common<4>(device, context, queue, num_elements,
- "float4");
-}
diff --git a/test_conformance/basic/test_get_linear_ids.cpp b/test_conformance/basic/test_get_linear_ids.cpp
index 3496fd0b..ee7dfb2f 100644
--- a/test_conformance/basic/test_get_linear_ids.cpp
+++ b/test_conformance/basic/test_get_linear_ids.cpp
@@ -104,15 +104,19 @@ test_get_linear_ids(cl_device_id device, cl_context context, cl_command_queue qu
switch (dims) {
case 1:
- log_info(" testing offset=%u global=%u local=%u...\n", gwo[0], gws[0], lws[0]);
+ log_info(" testing offset=%zu global=%zu local=%zu...\n", gwo[0],
+ gws[0], lws[0]);
break;
case 2:
- log_info(" testing offset=(%u,%u) global=(%u,%u) local=(%u,%u)...\n",
- gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]);
+ log_info(" testing offset=(%zu,%zu) global=(%zu,%zu) "
+ "local=(%zu,%zu)...\n",
+ gwo[0], gwo[1], gws[0], gws[1], lws[0], lws[1]);
break;
case 3:
- log_info(" testing offset=(%u,%u,%u) global=(%u,%u,%u) local=(%u,%u,%u)...\n",
- gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0], lws[1], lws[2]);
+ log_info(" testing offset=(%zu,%zu,%zu) global=(%zu,%zu,%zu) "
+ "local=(%zu,%zu,%zu)...\n",
+ gwo[0], gwo[1], gwo[2], gws[0], gws[1], gws[2], lws[0],
+ lws[1], lws[2]);
break;
}
diff --git a/test_conformance/basic/test_hiloeo.cpp b/test_conformance/basic/test_hiloeo.cpp
index 3470ad00..4e921a6e 100644
--- a/test_conformance/basic/test_hiloeo.cpp
+++ b/test_conformance/basic/test_hiloeo.cpp
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -13,14 +13,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#include "harness/compat.h"
-
+#include <iomanip>
+#include <limits.h>
#include <stdio.h>
#include <string.h>
-#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
-
+#include <vector>
#include "procs.h"
@@ -31,9 +30,10 @@ int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
typedef int (*OffsetFunc)( int index, int vectorSize );
static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
-typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
-static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
+static const char *test_str_names[] = { "char", "uchar", "short", "ushort",
+ "int", "uint", "long", "ulong",
+ "half", "float", "double" };
static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16};
static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16};
@@ -45,43 +45,41 @@ static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4};
// strcat(gentype, vector_size_names[out_vector_idx[i]]);
static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
-static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 2, 4, 8 };
static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
{
- cl_int *input_ptr, *output_ptr, *p;
int err;
- cl_uint i;
int hasDouble = is_extension_available( device, "cl_khr_fp64" );
+ int hasHalf = is_extension_available(device, "cl_khr_fp16");
cl_uint vectorSize, operatorToUse;
cl_uint type;
- MTdata d;
+ MTdataHolder d(gRandomSeed);
int expressionMode;
int numExpressionModes = 2;
size_t length = sizeof(cl_int) * 4 * n_elems;
- input_ptr = (cl_int*)malloc(length);
- output_ptr = (cl_int*)malloc(length);
+ std::vector<cl_int> input_ptr(4 * n_elems);
+ std::vector<cl_int> output_ptr(4 * n_elems);
- p = input_ptr;
- d = init_genrand( gRandomSeed );
- for (i=0; i<4 * (cl_uint) n_elems; i++)
- p[i] = genrand_int32(d);
- free_mtdata(d); d = NULL;
+ for (cl_uint i = 0; i < 4 * (cl_uint)n_elems; i++)
+ input_ptr[i] = genrand_int32(d);
for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
{
// Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
size_t elementCount = length / kSizes[type];
- cl_mem streams[2];
+ clMemWrapper streams[2];
// skip double if unavailable
if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
continue;
+ if (!hasHalf && (0 == strcmp(test_str_names[type], "half"))) continue;
+
if( !gHasLong &&
(( 0 == strcmp( test_str_names[type], "long" )) ||
( 0 == strcmp( test_str_names[type], "ulong" ))))
@@ -104,12 +102,9 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
return -1;
}
- err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueWriteBuffer failed\n");
- return -1;
- }
+ err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length,
+ input_ptr.data(), 0, NULL, NULL);
+ test_error(err, "clEnqueueWriteBuffer failed\n");
for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
{
@@ -118,8 +113,8 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
- cl_program program = NULL;
- cl_kernel kernel = NULL;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
cl_uint outVectorSize = out_vector_idx[vectorSize];
char expression[1024];
@@ -139,92 +134,64 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
"}\n"
};
- if(expressionMode == 0) {
- sprintf(expression, "srcA[tid]");
- } else if(expressionMode == 1) {
- switch(vector_sizes[vectorSize]) {
- case 16:
- sprintf(expression,
- "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
- test_str_names[type]
- );
- break;
- case 8:
- sprintf(expression,
- "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
- test_str_names[type]
- );
- break;
- case 4:
- sprintf(expression,
- "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
- test_str_names[type]
- );
- break;
- case 3:
- sprintf(expression,
- "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
- test_str_names[type]
- );
- break;
- case 2:
- sprintf(expression,
- "((%s2)(srcA[tid].s0, srcA[tid].s1))",
- test_str_names[type]
- );
- break;
- default :
- sprintf(expression, "srcA[tid]");
- log_info("Default\n");
- }
- } else {
- sprintf(expression, "srcA[tid]");
+ if (expressionMode == 1 && vector_sizes[vectorSize] != 1)
+ {
+ std::ostringstream sstr;
+ const char *index_chars[] = { "0", "1", "2", "3",
+ "4", "5", "6", "7",
+ "8", "9", "A", "B",
+ "C", "D", "E", "f" };
+ sstr << "((" << test_str_names[type]
+ << std::to_string(vector_sizes[vectorSize])
+ << ")(";
+ for (unsigned i = 0; i < vector_sizes[vectorSize]; i++)
+ sstr << " srcA[tid].s" << index_chars[i] << ",";
+ sstr.seekp(-1, sstr.cur);
+ sstr << "))";
+ std::snprintf(expression, sizeof(expression), "%s",
+ sstr.str().c_str());
+ }
+ else
+ {
+ std::snprintf(expression, sizeof(expression),
+ "srcA[tid]");
}
if (0 == strcmp( test_str_names[type], "double" ))
source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+ if (0 == strcmp(test_str_names[type], "half"))
+ source[0] =
+ "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+
char kernelName[128];
snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
- if (err)
- return -1;
+ test_error(err, "create_single_kernel_helper failed\n");
err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
+ test_error(err, "clSetKernelArg failed\n");
//Wipe the output buffer clean
uint32_t pattern = 0xdeadbeef;
- memset_pattern4( output_ptr, &pattern, length );
- err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueWriteBuffer failed\n");
- return -1;
- }
+ memset_pattern4(output_ptr.data(), &pattern, length);
+ err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0,
+ length, output_ptr.data(), 0,
+ NULL, NULL);
+ test_error(err, "clEnqueueWriteBuffer failed\n");
size_t size = elementCount / (vector_aligns[vectorSize]);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
+ test_error(err, "clEnqueueNDRangeKernel failed\n");
- err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
+ err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+ length, output_ptr.data(), 0,
+ NULL, NULL);
+ test_error(err, "clEnqueueReadBuffer failed\n");
- char *inP = (char *)input_ptr;
- char *outP = (char *)output_ptr;
+ char *inP = (char *)input_ptr.data();
+ char *outP = (char *)output_ptr.data();
outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
( vector_sizes[ out_vector_idx[vectorSize] ] ) );
// was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
@@ -240,180 +207,88 @@ int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue,
inP += kSizes[type] * ( vector_aligns[vectorSize] );
outP += kSizes[type] * ( vector_aligns[outVectorSize] );
}
-
- clReleaseKernel( kernel );
- clReleaseProgram( program );
log_info( "." );
fflush( stdout );
}
}
}
-
- clReleaseMemObject( streams[0] );
- clReleaseMemObject( streams[1] );
log_info( "done\n" );
}
log_info("HiLoEO test passed\n");
-
- free(input_ptr);
- free(output_ptr);
-
return err;
}
-static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
+template <typename T>
+cl_int verify(void *in, void *out, size_t elementCount, int type,
+ int vectorSize, int operatorToUse, size_t cmpVectorSize)
{
- cl_ulong array[8];
+ size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
+ size_t elementSize = kSizes[type];
+ OffsetFunc f = offsetFuncs[operatorToUse];
+ cl_ulong array[8];
void *p = array;
- size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
- size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
- // was 1 << (vectorSize-1);
- OffsetFunc f = offsetFuncs[ operatorToUse ];
- size_t elementSize = kSizes[type];
-
- if(vector_size_names[vectorSize][0] == '3') {
- if(operatorToUse_names[operatorToUse][0] == 'h' ||
- operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
- {
- cmpVectorSize = 1; // special case for vec3 ignored values
- }
- }
- switch( elementSize )
- {
- case 1:
- {
- char *i = (char*)in;
- char *o = (char*)out;
- size_t j;
- cl_uint k;
- OffsetFunc f = offsetFuncs[ operatorToUse ];
-
- for( k = 0; k < elementCount; k++ )
- {
- char *o2 = (char*)p;
- for( j = 0; j < halfVectorSize; j++ )
- o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
- if( memcmp( o, o2, elementSize * cmpVectorSize ) )
- {
- log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
- for( j = 1; j < halfVectorSize * 2; j++ )
- log_info( ", %d", i[j] );
- log_info( " } --> { %d", o[0] );
- for( j = 1; j < halfVectorSize; j++ )
- log_info( ", %d", o[j] );
- log_info( " }\n" );
- return -1;
- }
- i += 2 * halfVectorSize;
- o += halfVectorSize;
- }
- }
- break;
+ std::ostringstream ss;
- case 2:
- {
- short *i = (short*)in;
- short *o = (short*)out;
- size_t j;
- cl_uint k;
-
- for( k = 0; k < elementCount; k++ )
- {
- short *o2 = (short*)p;
- for( j = 0; j < halfVectorSize; j++ )
- o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
- if( memcmp( o, o2, elementSize * cmpVectorSize ) )
- {
- log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
- for( j = 1; j < halfVectorSize * 2; j++ )
- log_info( ", %d", i[j] );
- log_info( " } --> { %d", o[0] );
- for( j = 1; j < halfVectorSize; j++ )
- log_info( ", %d", o[j] );
- log_info( " }\n" );
- return -1;
- }
- i += 2 * halfVectorSize;
- o += halfVectorSize;
- }
- }
- break;
+ T *i = (T *)in, *o = (T *)out;
- case 4:
- {
- int *i = (int*)in;
- int *o = (int*)out;
- size_t j;
- cl_uint k;
-
- for( k = 0; k < elementCount; k++ )
- {
- int *o2 = (int *)p;
- for( j = 0; j < halfVectorSize; j++ )
- o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
- for( j = 0; j < cmpVectorSize; j++ )
+ for (cl_uint k = 0; k < elementCount; k++)
+ {
+ T *o2 = (T *)p;
+ for (size_t j = 0; j < halfVectorSize; j++)
+ o2[j] = i[f((int)j, (int)halfVectorSize * 2)];
+
+ if (memcmp(o, o2, elementSize * cmpVectorSize))
{
- /* Allow float nans to be binary different */
- if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
- {
- log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
- for( j = 1; j < halfVectorSize * 2; j++ )
- log_info( ", 0x%8.8x", i[j] );
- log_info( " } --> { 0x%8.8x", o[0] );
- for( j = 1; j < halfVectorSize; j++ )
- log_info( ", 0x%8.8x", o[j] );
- log_info( " }\n" );
+ ss << "\n"
+ << k << ") Failure for" << test_str_names[type]
+ << vector_size_names[vectorSize] << '.'
+ << operatorToUse_names[operatorToUse] << " { "
+ << "0x" << std::setfill('0') << std::setw(elementSize * 2)
+ << std::hex << i[0];
+
+ for (size_t j = 1; j < halfVectorSize * 2; j++) ss << ", " << i[j];
+ ss << " } --> { " << o[0];
+ for (size_t j = 1; j < halfVectorSize; j++) ss << ", " << o[j];
+ ss << " }\n";
return -1;
- }
}
i += 2 * halfVectorSize;
o += halfVectorSize;
- }
- }
- break;
-
- case 8:
- {
- cl_ulong *i = (cl_ulong*)in;
- cl_ulong *o = (cl_ulong*)out;
- size_t j;
- cl_uint k;
-
- for( k = 0; k < elementCount; k++ )
- {
- cl_ulong *o2 = (cl_ulong*)p;
- for( j = 0; j < halfVectorSize; j++ )
- o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
-
- if( memcmp( o, o2, elementSize * cmpVectorSize ) )
- {
- log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
- for( j = 1; j < halfVectorSize * 2; j++ )
- log_info( ", 0x%16.16llx", i[j] );
- log_info( " } --> { 0x%16.16llx", o[0] );
- for( j = 1; j < halfVectorSize; j++ )
- log_info( ", 0x%16.16llx", o[j] );
- log_info( " }\n" );
- return -1;
- }
- i += 2 * halfVectorSize;
- o += halfVectorSize;
- }
- }
- break;
-
- default:
- log_info( "Internal error. Unknown data type\n" );
- return -2;
}
-
return 0;
}
+static int CheckResults(void *in, void *out, size_t elementCount, int type,
+ int vectorSize, int operatorToUse)
+{
+ size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
+ size_t elementSize = kSizes[type];
+ if (vector_size_names[vectorSize][0] == '3')
+ {
+ if (operatorToUse_names[operatorToUse][0] == 'h'
+ || operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
+ {
+ cmpVectorSize = 1; // special case for vec3 ignored values
+ }
+ }
+ switch (elementSize)
+ {
+ case 1:
+ return verify<char>(in, out, elementCount, type, vectorSize,
+ operatorToUse, cmpVectorSize);
+ case 2:
+ return verify<short>(in, out, elementCount, type, vectorSize,
+ operatorToUse, cmpVectorSize);
+ case 4:
+ return verify<int>(in, out, elementCount, type, vectorSize,
+ operatorToUse, cmpVectorSize);
+ case 8:
+ return verify<cl_ulong>(in, out, elementCount, type, vectorSize,
+ operatorToUse, cmpVectorSize);
+ default: log_info("Internal error. Unknown data type\n"); return -2;
+ }
+}
diff --git a/test_conformance/basic/test_image_r8.cpp b/test_conformance/basic/test_image_r8.cpp
index b633d6ab..2dca1611 100644
--- a/test_conformance/basic/test_image_r8.cpp
+++ b/test_conformance/basic/test_image_r8.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -21,163 +21,111 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <algorithm>
+#include <vector>
#include "procs.h"
-static const char *r_uint8_kernel_code =
-"__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)\n"
-"{\n"
-" int tid_x = get_global_id(0);\n"
-" int tid_y = get_global_id(1);\n"
-" int indx = tid_y * get_image_width(srcimg) + tid_x;\n"
-" uint4 color;\n"
-"\n"
-" color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));\n"
-" dst[indx] = (unsigned char)(color.x);\n"
-"\n"
-"}\n";
-
-
-static unsigned char *
-generate_8bit_image(int w, int h, MTdata d)
+namespace {
+const char *r_uint8_kernel_code = R"(
+__kernel void test_r_uint8(read_only image2d_t srcimg, __global unsigned char *dst, sampler_t sampler)
{
- unsigned char *ptr = (unsigned char*)malloc(w * h * sizeof(unsigned char));
- int i;
+ int tid_x = get_global_id(0);
+ int tid_y = get_global_id(1);
+ int indx = tid_y * get_image_width(srcimg) + tid_x;
+ uint4 color;
- for (i=0; i<w*h; i++)
- ptr[i] = (unsigned char)genrand_int32(d);
+ color = read_imageui(srcimg, sampler, (int2)(tid_x, tid_y));
+ dst[indx] = (unsigned char)(color.x);
+})";
- return ptr;
-}
-static int
-verify_8bit_image(unsigned char *image, unsigned char *outptr, int w, int h)
+void generate_random_inputs(std::vector<cl_uchar> &v)
{
- int i;
+ RandomSeed seed(gRandomSeed);
- for (i=0; i<w*h; i++)
- {
- if (outptr[i] != image[i])
- {
- log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n");
- return -1;
- }
- }
+ auto random_generator = [&seed]() {
+ return static_cast<cl_uchar>(genrand_int32(seed));
+ };
- log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n");
- return 0;
+ std::generate(v.begin(), v.end(), random_generator);
}
-int
-test_image_r8(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+}
+int test_image_r8(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- cl_mem streams[2];
- cl_image_format img_format;
- cl_uchar *input_ptr, *output_ptr;
- cl_program program;
- cl_kernel kernel;
- size_t threads[3];
- int img_width = 512;
- int img_height = 512;
- int err;
- MTdata d;
-
- PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
-
- img_format.image_channel_order = CL_R;
- img_format.image_channel_data_type = CL_UNSIGNED_INT8;
+ clMemWrapper streams[2];
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ const size_t img_width = 512;
+ const size_t img_height = 512;
+ const size_t length = img_width * img_height;
+ int err;
+
+ PASSIVE_REQUIRE_IMAGE_SUPPORT(device)
+
+ const cl_image_format img_format = { CL_R, CL_UNSIGNED_INT8 };
// early out if this image type is not supported
if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
CL_MEM_OBJECT_IMAGE2D, &img_format))
{
log_info("WARNING: Image type not supported; skipping test.\n");
- return 0;
+ return TEST_SKIPPED_ITSELF;
}
- d = init_genrand( gRandomSeed );
- input_ptr = generate_8bit_image(img_width, img_height, d);
- free_mtdata(d); d = NULL;
+ std::vector<cl_uchar> input(length);
+ std::vector<cl_uchar> output(length);
+
+ generate_random_inputs(input);
- output_ptr = (cl_uchar*)malloc(sizeof(cl_uchar) * img_width * img_height);
streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &img_format,
- img_width, img_height, 0, NULL, NULL);
- if (!streams[0])
- {
- log_error("create_image_2d failed\n");
- return -1;
- }
+ img_width, img_height, 0, nullptr, &err);
+ test_error(err, "create_image_2d failed.");
streams[1] =
- clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_uchar) * img_width * img_height, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
+ clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err);
+ test_error(err, "clCreateBuffer failed.");
- size_t origin[3] = {0,0,0}, region[3]={img_width, img_height, 1};
- err = clEnqueueWriteImage(queue, streams[0], CL_TRUE,
- origin, region, 0, 0,
- input_ptr,
- 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clWriteImage failed: %d\n", err);
- return -1;
- }
+ const size_t origin[3] = { 0, 0, 0 },
+ region[3] = { img_width, img_height, 1 };
+ err = clEnqueueWriteImage(queue, streams[0], CL_TRUE, origin, region, 0, 0,
+ input.data(), 0, nullptr, nullptr);
+ test_error(err, "clEnqueueWriteImage failed.");
- err = create_single_kernel_helper(context, &program, &kernel, 1, &r_uint8_kernel_code, "test_r_uint8" );
- if (err) {
- log_error("Failed to create kernel and program: %d\n", err);
- return -1;
- }
+ err = create_single_kernel_helper(context, &program, &kernel, 1,
+ &r_uint8_kernel_code, "test_r_uint8");
+ test_error(err, "create_single_kernel_helper failed.");
- cl_sampler sampler = clCreateSampler(context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
- test_error(err, "clCreateSampler failed");
+ clSamplerWrapper sampler = clCreateSampler(
+ context, CL_FALSE, CL_ADDRESS_CLAMP_TO_EDGE, CL_FILTER_NEAREST, &err);
+ test_error(err, "clCreateSampler failed");
+
+ err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+ err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+ err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
+ test_error(err, "clSetKernelArgs failed\n");
+
+ size_t threads[] = { img_width, img_height };
+ err = clEnqueueNDRangeKernel(queue, kernel, 2, nullptr, threads, nullptr, 0,
+ nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed\n");
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
- err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed: %d\n", err);
- return -1;
- }
- threads[0] = (size_t)img_width;
- threads[1] = (size_t)img_height;
- err = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, NULL );
- if (err != CL_SUCCESS)
+ err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length,
+ output.data(), 0, nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed\n");
+
+ if (0 != memcmp(input.data(), output.data(), length))
{
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
+ log_error("READ_IMAGE_R_UNSIGNED_INT8 test failed\n");
+ err = -1;
}
-
- err = clEnqueueReadBuffer( queue, streams[1], CL_TRUE, 0, sizeof(cl_uchar)*img_width*img_height, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
+ else
{
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
+ log_info("READ_IMAGE_R_UNSIGNED_INT8 test passed\n");
}
- err = verify_8bit_image(input_ptr, output_ptr, img_width, img_height);
-
-
- // cleanup
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- clReleaseSampler(sampler);
- free(input_ptr);
- free(output_ptr);
-
return err;
}
-
-
-
-
-
diff --git a/test_conformance/basic/test_int2float.cpp b/test_conformance/basic/test_int2float.cpp
deleted file mode 100644
index 3a8458c9..00000000
--- a/test_conformance/basic/test_int2float.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *int2float_kernel_code =
-"__kernel void test_int2float(__global int *src, __global float *dst)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-"\n"
-" dst[tid] = (float)src[tid];\n"
-"\n"
-"}\n";
-
-
-int
-verify_int2float(cl_int *inptr, cl_float *outptr, int n)
-{
- int i;
-
- for (i=0; i<n; i++)
- {
- if (outptr[i] != (float)inptr[i])
- {
- log_error("INT2FLOAT test failed\n");
- return -1;
- }
- }
-
- log_info("INT2FLOAT test passed\n");
- return 0;
-}
-
-int
-test_int2float(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
- cl_mem streams[2];
- cl_int *input_ptr;
- cl_float *output_ptr;
- cl_program program;
- cl_kernel kernel;
- size_t threads[1];
- int err;
- int i;
- MTdata d;
-
- input_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
- output_ptr = (cl_float*)malloc(sizeof(cl_float) * num_elements);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_float) * num_elements, NULL, NULL);
- if (!streams[1])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
-
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- input_ptr[i] = (cl_int)get_random_float(-MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT( 0x1.0p31f, 0x1, 31), d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clWriteArray failed\n");
- return -1;
- }
-
- err = create_single_kernel_helper(context, &program, &kernel, 1, &int2float_kernel_code, "test_int2float");
- if (err != CL_SUCCESS)
- {
- log_error("create_single_kernel_helper failed\n");
- return -1;
- }
-
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
- if (err != CL_SUCCESS)
- {
- log_error("clSetKernelArgs failed\n");
- return -1;
- }
-
- threads[0] = (size_t)num_elements;
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_float)*num_elements, (void *)output_ptr, 0, NULL, NULL );
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueReadBuffer failed\n");
- return -1;
- }
-
- err = verify_int2float(input_ptr, output_ptr, num_elements);
-
- // cleanup
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr);
- free(output_ptr);
-
- return err;
-}
-
-
-
-
-
diff --git a/test_conformance/basic/test_int2fp.cpp b/test_conformance/basic/test_int2fp.cpp
new file mode 100644
index 00000000..dd5cc9a1
--- /dev/null
+++ b/test_conformance/basic/test_int2fp.cpp
@@ -0,0 +1,325 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "CL/cl_half.h"
+#include "harness/compat.h"
+#include "harness/errorHelpers.h"
+#include "harness/stringHelpers.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <map>
+#include <vector>
+
+#include "procs.h"
+
+extern cl_half_rounding_mode halfRoundingMode;
+
+#define HFF(num) cl_half_from_float(num, halfRoundingMode)
+#define HTF(num) cl_half_to_float(num)
+
+namespace {
+const char *int2float_kernel_code = R"(
+%s
+__kernel void test_X2Y(__global TYPE_X *src, __global TYPE_Y *dst)
+{
+ int tid = get_global_id(0);
+
+ dst[tid] = (TYPE_Y)src[tid];
+
+})";
+
+template <bool int2fp> struct TypesIterator
+{
+ TypesIterator(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elems, const char *test_name)
+ : context(context), queue(queue), test_name(test_name),
+ num_elements(num_elems)
+ {
+ fp16Support = is_extension_available(deviceID, "cl_khr_fp16");
+ fp64Support = is_extension_available(deviceID, "cl_khr_fp64");
+
+ type2name[sizeof(cl_half)] = std::make_pair("half", "short");
+ type2name[sizeof(cl_float)] = std::make_pair("float", "int");
+ type2name[sizeof(cl_double)] = std::make_pair("double", "long");
+
+ std::tuple<cl_float, cl_half, cl_double> it;
+ for_each_elem(it);
+ }
+
+ template <typename T> void generate_random_inputs(std::vector<T> &v)
+ {
+ RandomSeed seed(gRandomSeed);
+
+ if (sizeof(T) == sizeof(cl_half))
+ {
+ // Bound generated half values to 0x1.ffcp+14(32752.0) which is the
+ // largest cl_half value smaller than the max value of cl_short,
+ // 32767.
+ if (int2fp)
+ {
+ auto random_generator = [&seed]() {
+ return (cl_short)get_random_float(
+ -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14),
+ MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed);
+ };
+ std::generate(v.begin(), v.end(), random_generator);
+ }
+ else
+ {
+ auto random_generator = [&seed]() {
+ return HFF(get_random_float(
+ -MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14),
+ MAKE_HEX_FLOAT(0x1.ffcp+14, 1.9990234375f, 14), seed));
+ };
+ std::generate(v.begin(), v.end(), random_generator);
+ }
+ }
+ else if (sizeof(T) == sizeof(cl_float))
+ {
+ auto random_generator = [&seed]() {
+ return get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31),
+ MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31),
+ seed);
+ };
+ std::generate(v.begin(), v.end(), random_generator);
+ }
+ else if (sizeof(T) == sizeof(cl_double))
+ {
+ auto random_generator = [&seed]() {
+ return get_random_double(-MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63),
+ MAKE_HEX_DOUBLE(0x1.0p63, 0x1, 63),
+ seed);
+ };
+ std::generate(v.begin(), v.end(), random_generator);
+ }
+ }
+
+ template <typename Tx, typename Ty> static bool equal_value(Tx a, Ty b)
+ {
+ return a == (Tx)b;
+ }
+
+ static bool equal_value_from_half(cl_short a, cl_half b)
+ {
+ return a == (cl_short)HTF(b);
+ }
+
+ static bool equal_value_to_half(cl_half a, cl_short b)
+ {
+ return a == HFF((float)b);
+ }
+
+
+ template <typename Tx, typename Ty>
+ int verify_X2Y(std::vector<Tx> input, std::vector<Ty> output)
+ {
+ if (std::is_same<Tx, cl_half>::value
+ || std::is_same<Ty, cl_half>::value)
+ {
+ bool res = true;
+ if (int2fp)
+ res = std::equal(output.begin(), output.end(), input.begin(),
+ equal_value_to_half);
+ else
+ res = std::equal(output.begin(), output.end(), input.begin(),
+ equal_value_from_half);
+
+ if (!res)
+ {
+ log_error("%s test failed\n", test_name.c_str());
+ return -1;
+ }
+ }
+ else
+ {
+ if (!std::equal(output.begin(), output.end(), input.begin(),
+ equal_value<Tx, Ty>))
+ {
+ log_error("%s test failed\n", test_name.c_str());
+ return -1;
+ }
+ }
+
+ log_info("%s test passed\n", test_name.c_str());
+ return 0;
+ }
+
+ template <typename Tx, typename Ty> int test_X2Y()
+ {
+ clMemWrapper streams[2];
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ int err;
+
+ std::vector<Tx> input(num_elements);
+ std::vector<Ty> output(num_elements);
+
+ streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(Tx) * num_elements, nullptr, &err);
+ test_error(err, "clCreateBuffer failed.");
+ streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
+ sizeof(Ty) * num_elements, nullptr, &err);
+ test_error(err, "clCreateBuffer failed.");
+
+ generate_random_inputs(input);
+
+ err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0,
+ sizeof(Tx) * num_elements, input.data(), 0,
+ nullptr, nullptr);
+ test_error(err, "clEnqueueWriteBuffer failed.");
+
+ std::string src_name = type2name[sizeof(Tx)].first;
+ std::string dst_name = type2name[sizeof(Tx)].second;
+ if (int2fp) std::swap(src_name, dst_name);
+
+ std::string build_options;
+ build_options.append("-DTYPE_X=").append(src_name.c_str());
+ build_options.append(" -DTYPE_Y=").append(dst_name.c_str());
+
+ std::string extension;
+ if (sizeof(Tx) == sizeof(cl_double))
+ extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+
+ if (sizeof(Tx) == sizeof(cl_half))
+ extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+
+ std::string kernelSource =
+ str_sprintf(int2float_kernel_code, extension.c_str());
+ const char *ptr = kernelSource.c_str();
+
+ err = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
+ "test_X2Y", build_options.c_str());
+ test_error(err, "create_single_kernel_helper failed.");
+
+ err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
+ err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
+ test_error(err, "clSetKernelArg failed.");
+
+ size_t threads[] = { (size_t)num_elements };
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads,
+ nullptr, 0, nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed.");
+
+ err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+ sizeof(Ty) * num_elements, output.data(), 0,
+ nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed.");
+
+ err = verify_X2Y(input, output);
+
+ return err;
+ }
+
+ template <typename T> bool skip_type()
+ {
+ if (std::is_same<double, T>::value && !fp64Support)
+ return true;
+ else if (std::is_same<cl_half, T>::value && !fp16Support)
+ return true;
+ return false;
+ }
+
+ template <std::size_t Cnt = 0, typename T> void iterate_type(const T &t)
+ {
+ bool doTest = !skip_type<T>();
+
+ if (doTest)
+ {
+ typedef typename std::conditional<
+ (sizeof(T) == sizeof(std::int16_t)), std::int16_t,
+ typename std::conditional<(sizeof(T) == sizeof(std::int32_t)),
+ std::int32_t,
+ std::int64_t>::type>::type U;
+ if (int2fp)
+ {
+ if (test_X2Y<U, T>())
+ throw std::runtime_error("test_X2Y failed\n");
+ }
+ else
+ {
+ if (test_X2Y<T, U>())
+ throw std::runtime_error("test_X2Y failed\n");
+ }
+ }
+ }
+
+ template <std::size_t Cnt = 0, typename... Tp>
+ inline typename std::enable_if<Cnt == sizeof...(Tp), void>::type
+ for_each_elem(
+ const std::tuple<Tp...> &) // Unused arguments are given no names.
+ {}
+
+ template <std::size_t Cnt = 0, typename... Tp>
+ inline typename std::enable_if < Cnt<sizeof...(Tp), void>::type
+ for_each_elem(const std::tuple<Tp...> &t)
+ {
+ iterate_type<Cnt>(std::get<Cnt>(t));
+ for_each_elem<Cnt + 1, Tp...>(t);
+ }
+
+protected:
+ cl_context context;
+ cl_command_queue queue;
+
+ cl_device_fp_config fpConfigHalf;
+ cl_device_fp_config fpConfigFloat;
+
+ bool fp16Support;
+ bool fp64Support;
+
+ std::map<size_t, std::pair<std::string, std::string>> type2name;
+
+ std::string test_name;
+ int num_elements;
+};
+
+}
+
+int test_int2fp(cl_device_id device, cl_context context, cl_command_queue queue,
+ int num_elements)
+{
+ try
+ {
+ TypesIterator<true>(device, context, queue, num_elements, "INT2FP");
+ } catch (const std::runtime_error &e)
+ {
+ log_error("%s", e.what());
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
+
+int test_fp2int(cl_device_id device, cl_context context, cl_command_queue queue,
+ int num_elements)
+{
+ try
+ {
+ TypesIterator<false>(device, context, queue, num_elements, "FP2INT");
+ } catch (const std::runtime_error &e)
+ {
+ log_error("%s", e.what());
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
diff --git a/test_conformance/basic/test_intmath.cpp b/test_conformance/basic/test_intmath.cpp
index 6fd41abb..5a4e9c2a 100644
--- a/test_conformance/basic/test_intmath.cpp
+++ b/test_conformance/basic/test_intmath.cpp
@@ -123,7 +123,7 @@ int test_intmath(cl_device_id device, cl_context context,
size_t datasize = sizeof(T) * num_elements * N;
// Create device buffers.
- for (int i = 0; i < ARRAY_SIZE(streams); i++)
+ for (size_t i = 0; i < ARRAY_SIZE(streams); i++)
{
streams[i] =
clCreateBuffer(context, CL_MEM_READ_WRITE, datasize, NULL, &err);
@@ -175,7 +175,7 @@ int test_intmath(cl_device_id device, cl_context context,
test_error(err, "clEnqueueReadBuffer failed\n");
// Verify results
- for (int i = 0; i < num_elements * N; i++)
+ for (unsigned i = 0; i < num_elements * N; i++)
{
T r = test.ref(inputA[i], inputB[i], inputC[i]);
if (r != output[i])
diff --git a/test_conformance/basic/test_loop.cpp b/test_conformance/basic/test_loop.cpp
index 1a91d9e4..1c9acd1a 100644
--- a/test_conformance/basic/test_loop.cpp
+++ b/test_conformance/basic/test_loop.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -21,45 +21,45 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include <vector>
#include "procs.h"
-const char *loop_kernel_code =
-"__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)\n"
-"{\n"
-" int tid = get_global_id(0);\n"
-" int n = get_global_size(0);\n"
-" int i, j;\n"
-"\n"
-" dst[tid] = 0;\n"
-" for (i=0,j=loopindx[tid]; i<loopcnt[tid]; i++,j++)\n"
-" {\n"
-" if (j >= n)\n"
-" j = 0;\n"
-" dst[tid] += src[j];\n"
-" }\n"
-"\n"
-"}\n";
-
-
-int
-verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
+namespace {
+const char *loop_kernel_code = R"(
+__kernel void test_loop(__global int *src, __global int *loopindx, __global int *loopcnt, __global int *dst)
{
- int r, i, j, k;
+ int tid = get_global_id(0);
+ int n = get_global_size(0);
+ int i, j;
- for (i=0; i<n; i++)
+ dst[tid] = 0;
+ for (i=0, j=loopindx[tid]; i<loopcnt[tid]; i++, j++)
{
- r = 0;
- for (j=0,k=loopindx[i]; j<loopcnt[i]; j++,k++)
+ if (j >= n)
+ j = 0;
+ dst[tid] += src[j];
+ }
+}
+)";
+
+
+int verify_loop(std::vector<cl_int> inptr, std::vector<cl_int> loopindx,
+ std::vector<cl_int> loopcnt, std::vector<cl_int> outptr, int n)
+{
+ for (int i = 0; i < n; i++)
+ {
+ int r = 0;
+ for (int j = 0, k = loopindx[i]; j < loopcnt[i]; j++, k++)
{
- if (k >= n)
- k = 0;
+ if (k >= n) k = 0;
r += inptr[k];
}
if (r != outptr[i])
{
- log_error("LOOP test failed: %d found, expected %d\n", outptr[i], r);
+ log_error("LOOP test failed: %d found, expected %d\n", outptr[i],
+ r);
return -1;
}
}
@@ -67,119 +67,69 @@ verify_loop(int *inptr, int *loopindx, int *loopcnt, int *outptr, int n)
log_info("LOOP test passed\n");
return 0;
}
-
-int test_loop(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+}
+int test_loop(cl_device_id device, cl_context context, cl_command_queue queue,
+ int num_elements)
{
- cl_mem streams[4];
- cl_int *input_ptr, *loop_indx, *loop_cnt, *output_ptr;
- cl_program program;
- cl_kernel kernel;
- size_t threads[1];
- int err, i;
+ clMemWrapper streams[4];
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ int err;
size_t length = sizeof(cl_int) * num_elements;
- input_ptr = (cl_int*)malloc(length);
- loop_indx = (cl_int*)malloc(length);
- loop_cnt = (cl_int*)malloc(length);
- output_ptr = (cl_int*)malloc(length);
+ std::vector<cl_int> input(length);
+ std::vector<cl_int> loop_indx(length);
+ std::vector<cl_int> loop_cnt(length);
+ std::vector<cl_int> output(length);
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
- if (!streams[0])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
- streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
- if (!streams[1])
+ for (auto &stream : streams)
{
- log_error("clCreateBuffer failed\n");
- return -1;
- }
- streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
- if (!streams[2])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
- }
- streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
- if (!streams[3])
- {
- log_error("clCreateBuffer failed\n");
- return -1;
+ stream =
+ clCreateBuffer(context, CL_MEM_READ_WRITE, length, nullptr, &err);
+ test_error(err, "clCreateBuffer failed.");
}
- MTdata d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
+ RandomSeed seed(gRandomSeed);
+ for (int i = 0; i < num_elements; i++)
{
- input_ptr[i] = (int)genrand_int32(d);
- loop_indx[i] = (int)get_random_float(0, num_elements-1, d);
- loop_cnt[i] = (int)get_random_float(0, num_elements/32, d);
- }
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueWriteBuffer failed\n");
- return -1;
- }
- err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, loop_indx, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueWriteBuffer failed\n");
- return -1;
- }
- err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, loop_cnt, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueWriteBuffer failed\n");
- return -1;
- }
-
- err = create_single_kernel_helper(context, &program, &kernel, 1, &loop_kernel_code, "test_loop" );
- if (err)
- return -1;
-
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
- err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
- err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
- err |= clSetKernelArg(kernel, 3, sizeof streams[3], &streams[3]);
- if (err != CL_SUCCESS)
+ input[i] = static_cast<int>(genrand_int32(seed));
+ loop_indx[i] =
+ static_cast<int>(get_random_float(0, num_elements - 1, seed));
+ loop_cnt[i] =
+ static_cast<int>(get_random_float(0, num_elements / 32, seed));
+ };
+
+ err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length,
+ input.data(), 0, nullptr, nullptr);
+ test_error(err, "clEnqueueWriteBuffer failed.");
+ err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length,
+ loop_indx.data(), 0, nullptr, nullptr);
+ test_error(err, "clEnqueueWriteBuffer failed.");
+ err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length,
+ loop_cnt.data(), 0, nullptr, nullptr);
+ test_error(err, "clEnqueueWriteBuffer failed.");
+
+ err = create_single_kernel_helper(context, &program, &kernel, 1,
+ &loop_kernel_code, "test_loop");
+ test_error(err, "create_single_kernel_helper failed.");
+
+ for (int i = 0; i < ARRAY_SIZE(streams); i++)
{
- log_error("clSetKernelArgs failed\n");
- return -1;
+ err = clSetKernelArg(kernel, i, sizeof streams[i], &streams[i]);
+ test_error(err, "clSetKernelArgs failed\n");
}
- threads[0] = (unsigned int)num_elements;
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueNDRangeKernel failed\n");
- return -1;
- }
-
- err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clReadArray failed\n");
- return -1;
- }
-
- err = verify_loop(input_ptr, loop_indx, loop_cnt, output_ptr, num_elements);
-
- // cleanup
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseMemObject(streams[2]);
- clReleaseMemObject(streams[3]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr);
- free(loop_indx);
- free(loop_cnt);
- free(output_ptr);
+ size_t threads[] = { (size_t)num_elements };
+ err = clEnqueueNDRangeKernel(queue, kernel, 1, nullptr, threads, nullptr, 0,
+ nullptr, nullptr);
+ test_error(err, "clEnqueueNDRangeKernel failed\n");
- return err;
-}
+ err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length,
+ output.data(), 0, nullptr, nullptr);
+ test_error(err, "clEnqueueReadBuffer failed\n");
+
+ err = verify_loop(input, loop_indx, loop_cnt, output, num_elements);
+ return err;
+}
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index a46713e9..41cc0199 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -581,13 +581,19 @@ static void l_load_abilities(cl_device_id device)
cl_uint max_dim = 0;
status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
sizeof(max_dim), &max_dim, 0);
- assert(status == CL_SUCCESS);
+ if (check_error(status,
+ "clGetDeviceInfo for "
+ "CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed."))
+ return;
assert(max_dim > 0);
size_t max_id[3];
max_id[0] = 0;
status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
max_dim * sizeof(size_t), &max_id[0], 0);
- assert(status == CL_SUCCESS);
+ if (check_error(status,
+ "clGetDeviceInfo for "
+ "CL_DEVICE_MAX_WORK_ITEM_SIZES failed."))
+ return;
l_max_global_id0 = max_id[0];
}
@@ -597,7 +603,10 @@ static void l_load_abilities(cl_device_id device)
status =
clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE,
sizeof(l_linker_available), &l_linker_available, 0);
- assert(status == CL_SUCCESS);
+ if (check_error(status,
+ "clGetDeviceInfo for "
+ "CL_DEVICE_LINKER_AVAILABLE failed."))
+ return;
}
}
@@ -903,6 +912,7 @@ static std::string global_decls(const TypeInfo& ti, bool with_init)
vol, tn, vol, tn, vol, tn, vol, tn);
}
assert(num_printed < sizeof(decls));
+ (void)num_printed;
return std::string(decls);
}
@@ -983,6 +993,7 @@ static std::string writer_function(const TypeInfo& ti)
writer_template_atomic, ti.get_buf_elem_type());
}
assert(num_printed < sizeof(writer_src));
+ (void)num_printed;
std::string result = writer_src;
return result;
}
@@ -1024,6 +1035,7 @@ static std::string reader_function(const TypeInfo& ti)
ti.get_buf_elem_type(), ti.get_buf_elem_type());
}
assert(num_printed < sizeof(reader_src));
+ (void)num_printed;
std::string result = reader_src;
return result;
}
diff --git a/test_conformance/basic/test_vec_type_hint.cpp b/test_conformance/basic/test_vec_type_hint.cpp
index 33168b13..0ba105db 100644
--- a/test_conformance/basic/test_vec_type_hint.cpp
+++ b/test_conformance/basic/test_vec_type_hint.cpp
@@ -13,28 +13,27 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#include "harness/compat.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
-
+#include <vector>
#include "procs.h"
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
-
static const char *sample_kernel = {
- "%s\n" // optional pragma string
- "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global int *src, __global int *dst)\n"
- "{\n"
- " int tid = get_global_id(0);\n"
- " dst[tid] = src[tid];\n"
- "\n"
- "}\n"
+ "%s\n"
+ "__kernel __attribute__((vec_type_hint(%s%s))) void sample_test(__global "
+ "int *src, __global int *dst)\n"
+ "{\n"
+ " int tid = get_global_id(0);\n"
+ " dst[tid] = src[tid];\n"
+ "\n"
+ "}\n"
};
int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
@@ -42,66 +41,85 @@ int test_vec_type_hint(cl_device_id deviceID, cl_context context, cl_command_que
int error;
int vec_type_index, vec_size_index;
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
- const char *size_names[] = {"", "2", "4", "8", "16"};
- char *program_source;
-
- program_source = (char*)malloc(sizeof(char)*4096);
+ ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt,
+ kLong, kULong, kFloat, kHalf, kDouble };
+ const char *size_names[] = { "", "2", "4", "8", "16" };
+ std::vector<char> program_source(4096);
+
+ for (vec_type_index = 0;
+ vec_type_index < sizeof(vecType) / sizeof(vecType[0]); vec_type_index++)
+ {
+
+ if (vecType[vec_type_index] == kHalf
+ && !is_extension_available(deviceID, "cl_khr_fp16"))
+ {
+ log_info(
+ "Extension cl_khr_fp16 not supported; skipping half tests.\n");
+ continue;
+ }
+ else if (vecType[vec_type_index] == kDouble
+ && !is_extension_available(deviceID, "cl_khr_fp64"))
+ {
+ log_info(
+ "Extension cl_khr_fp64 not supported; skipping double tests.\n");
+ continue;
+ }
+ else if ((vecType[vec_type_index] == kLong
+ || vecType[vec_type_index] == kULong)
+ && !gHasLong)
+ {
+ log_info(
+ "Extension cl_khr_int64 not supported; skipping long tests.\n");
+ continue;
+ }
- for (vec_type_index=0; vec_type_index<10; vec_type_index++) {
- if (vecType[vec_type_index] == kDouble) {
- if (!is_extension_available(deviceID, "cl_khr_fp64")) {
- log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
- continue;
+ for (vec_size_index = 0; vec_size_index < 5; vec_size_index++)
+ {
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ clMemWrapper in, out;
+ size_t global[] = { 1, 1, 1 };
+
+ log_info("Testing __attribute__((vec_type_hint(%s%s))...\n",
+ get_explicit_type_name(vecType[vec_type_index]),
+ size_names[vec_size_index]);
+ char extension[128] = { 0 };
+ if (vecType[vec_type_index] == kDouble)
+ std::snprintf(extension, sizeof(extension),
+ "#pragma OPENCL EXTENSION cl_khr_fp64 : enable");
+ else if (vecType[vec_type_index] == kHalf)
+ std::snprintf(extension, sizeof(extension),
+ "#pragma OPENCL EXTENSION cl_khr_fp16 : enable");
+
+ sprintf(program_source.data(), sample_kernel, extension,
+ get_explicit_type_name(vecType[vec_type_index]),
+ size_names[vec_size_index]);
+
+ const char *src = &program_source.front();
+ error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &src, "sample_test");
+ test_error(error, "create_single_kernel_helper failed");
+
+ in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int) * 10,
+ NULL, &error);
+ test_error(error, "clCreateBuffer failed");
+ out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int) * 10,
+ NULL, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ error = clSetKernelArg(kernel, 0, sizeof(in), &in);
+ test_error(error, "clSetKernelArg failed");
+ error = clSetKernelArg(kernel, 1, sizeof(out), &out);
+ test_error(error, "clSetKernelArg failed");
+
+ error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL,
+ 0, NULL, NULL);
+ test_error(error, "clEnqueueNDRangeKernel failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
}
- log_info("Testing doubles.\n");
- }
-
- if (vecType[vec_type_index] == kLong || vecType[vec_type_index] == kULong)
- {
- if (!gHasLong)
- {
- log_info("Extension cl_khr_int64 not supported; skipping long tests.\n");
- continue;
- }
- }
-
- for (vec_size_index=0; vec_size_index<5; vec_size_index++) {
- clProgramWrapper program;
- clKernelWrapper kernel;
- clMemWrapper in, out;
- size_t global[] = {1,1,1};
-
- log_info("Testing __attribute__((vec_type_hint(%s%s))...\n", get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
-
- program_source[0] = '\0';
- sprintf(program_source, sample_kernel,
- (vecType[vec_type_index] == kDouble) ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- get_explicit_type_name(vecType[vec_type_index]), size_names[vec_size_index]);
-
- error = create_single_kernel_helper( context, &program, &kernel, 1, (const char**)&program_source, "sample_test" );
- if( error != 0 )
- return error;
-
- in = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int)*10, NULL, &error);
- test_error(error, "clCreateBuffer failed");
- out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int)*10, NULL, &error);
- test_error(error, "clCreateBuffer failed");
-
- error = clSetKernelArg(kernel, 0, sizeof(in), &in);
- test_error(error, "clSetKernelArg failed");
- error = clSetKernelArg(kernel, 1, sizeof(out), &out);
- test_error(error, "clSetKernelArg failed");
-
- error = clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global, NULL, 0, NULL, NULL);
- test_error(error, "clEnqueueNDRangeKernel failed");
-
- error = clFinish(queue);
- test_error(error, "clFinish failed");
- }
}
- free(program_source);
-
return 0;
}
diff --git a/test_conformance/basic/test_vector_creation.cpp b/test_conformance/basic/test_vector_creation.cpp
index d9530b4e..6bae156a 100644
--- a/test_conformance/basic/test_vector_creation.cpp
+++ b/test_conformance/basic/test_vector_creation.cpp
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -17,48 +17,41 @@
#include "harness/conversions.h"
#include "harness/typeWrappers.h"
#include "harness/errorHelpers.h"
+#include <vector>
-
-
+#include <CL/cl_half.h>
#define DEBUG 0
#define DEPTH 16
// Limit the maximum code size for any given kernel.
-#define MAX_CODE_SIZE (1024*32)
-
-const int sizes[] = {1, 2, 3, 4, 8, 16, -1, -1, -1, -1};
-const char *size_names[] = {"", "2", "3", "4", "8", "16" , "!!a", "!!b", "!!c", "!!d"};
-
-// Creates a kernel by enumerating all possible ways of building the vector out of vloads
-// skip_to_results will skip results up to a given number. If the amount of code generated
-// is greater than MAX_CODE_SIZE, this function will return the number of results used,
-// which can then be used as the skip_to_result value to continue where it left off.
-int create_kernel(ExplicitType type, int output_size, char *program, int *number_of_results, int skip_to_result) {
+#define MAX_CODE_SIZE (1024 * 32)
+
+static const int sizes[] = { 1, 2, 3, 4, 8, 16, -1, -1, -1, -1 };
+static const int initial_no_sizes[] = { 0, 0, 0, 0, 0, 0, 2 };
+static const char *size_names[] = { "", "2", "3", "4", "8",
+ "16", "!!a", "!!b", "!!c", "!!d" };
+static char extension[128] = { 0 };
+
+// Creates a kernel by enumerating all possible ways of building the vector out
+// of vloads skip_to_results will skip results up to a given number. If the
+// amount of code generated is greater than MAX_CODE_SIZE, this function will
+// return the number of results used, which can then be used as the
+// skip_to_result value to continue where it left off.
+int create_kernel(ExplicitType type, int output_size, char *program,
+ int *number_of_results, int skip_to_result)
+{
int number_of_sizes;
- switch (output_size) {
- case 1:
- number_of_sizes = 1;
- break;
- case 2:
- number_of_sizes = 2;
- break;
- case 3:
- number_of_sizes = 3;
- break;
- case 4:
- number_of_sizes = 4;
- break;
- case 8:
- number_of_sizes = 5;
- break;
- case 16:
- number_of_sizes = 6;
- break;
- default:
- log_error("Invalid size: %d\n", output_size);
- return -1;
+ switch (output_size)
+ {
+ case 1: number_of_sizes = 1; break;
+ case 2: number_of_sizes = 2; break;
+ case 3: number_of_sizes = 3; break;
+ case 4: number_of_sizes = 4; break;
+ case 8: number_of_sizes = 5; break;
+ case 16: number_of_sizes = 6; break;
+ default: log_error("Invalid size: %d\n", output_size); return -1;
}
int total_results = 0;
@@ -67,102 +60,125 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number
int total_program_length = 0;
int aborted_due_to_size = 0;
- if (skip_to_result < 0)
- skip_to_result = 0;
+ if (skip_to_result < 0) skip_to_result = 0;
// The line of code for the vector creation
char line[1024];
- // Keep track of what size vector we are using in each position so we can iterate through all fo them
+ // Keep track of what size vector we are using in each position so we can
+ // iterate through all fo them
int pos[DEPTH];
int max_size = output_size;
if (DEBUG > 1) log_info("max_size: %d\n", max_size);
program[0] = '\0';
- sprintf(program, "%s\n__kernel void test_vector_creation(__global %s *src, __global %s%s *result) {\n",
- type == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable" : "",
- get_explicit_type_name(type), get_explicit_type_name(type), ( number_of_sizes == 3 ) ? "" : size_names[number_of_sizes-1]);
+ sprintf(program,
+ "%s\n__kernel void test_vector_creation(__global %s *src, __global "
+ "%s%s *result) {\n",
+ extension, get_explicit_type_name(type),
+ get_explicit_type_name(type),
+ (number_of_sizes == 3) ? "" : size_names[number_of_sizes - 1]);
total_program_length += (int)strlen(program);
- char storePrefix[ 128 ], storeSuffix[ 128 ];
+ char storePrefix[128], storeSuffix[128];
- // Start out trying sizes 1,1,1,1,1...
- for (int i=0; i<DEPTH; i++)
- pos[i] = 0;
+ // Start out trying sizes 1,1,1... by initializing pos array to zeros for
+ // all vector sizes except 16. For 16-sizes initial_no_sizes array holds
+ // factor to omit time consuming, similar creation cases tested earlier.
+ for (int i = 0; i < DEPTH; i++) pos[i] = initial_no_sizes[number_of_sizes];
int done = 0;
- while (!done) {
- if (DEBUG > 1) {
+ while (!done)
+ {
+ if (DEBUG > 1)
+ {
log_info("pos size[] = [");
- for (int k=0; k<DEPTH; k++)
- log_info(" %d ", pos[k]);
+ for (int k = 0; k < DEPTH; k++) log_info(" %d ", pos[k]);
log_info("]\n");
}
- // Go through the selected vector sizes and see if the first n of them fit the
+ // Go through the selected vector sizes and see if the first n of them
+ // fit the
// required size exactly.
int size_so_far = 0;
int vloads;
- for ( vloads=0; vloads<DEPTH; vloads++) {
- if (size_so_far + sizes[pos[vloads]] <= max_size) {
+ for (vloads = 0; vloads < DEPTH; vloads++)
+ {
+ if (size_so_far + sizes[pos[vloads]] <= max_size)
+ {
size_so_far += sizes[pos[vloads]];
- } else {
+ }
+ else
+ {
break;
}
}
- if (DEBUG > 1) log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
+ if (DEBUG > 1)
+ log_info("vloads: %d, size_so_far:%d\n", vloads, size_so_far);
- // If they did not fit the required size exactly it is too long, so there is no point in checking any other combinations
+ // If they did not fit the required size exactly it is too long, so
+ // there is no point in checking any other combinations
// of the sizes to the right. Prune them from the search.
- if (size_so_far != max_size) {
+ if (size_so_far != max_size)
+ {
// Zero all the sizes to the right
- for (int k=vloads+1; k<DEPTH; k++) {
+ for (int k = vloads + 1; k < DEPTH; k++)
+ {
pos[k] = 0;
}
// Increment this current size and propagate the values up if needed
- for (int d=vloads; d>=0; d--) {
+ for (int d = vloads; d >= 0; d--)
+ {
pos[d]++;
- if (pos[d] >= number_of_sizes) {
+ if (pos[d] >= number_of_sizes)
+ {
pos[d] = 0;
- if (d == 0) {
+ if (d == 0)
+ {
// If we rolled over then we are done
done = 1;
break;
}
- } else {
+ }
+ else
+ {
break;
}
}
- // Go on to the next size since this one (and all others "under" it) didn't fit
+ // Go on to the next size since this one (and all others "under" it)
+ // didn't fit
continue;
}
// Generate the actual load line if we are building this part
- line[0]= '\0';
- if (skip_to_result == 0 || total_results >= skip_to_result) {
- if( number_of_sizes == 3 )
+ line[0] = '\0';
+ if (skip_to_result == 0 || total_results >= skip_to_result)
+ {
+ if (number_of_sizes == 3)
{
- sprintf( storePrefix, "vstore3( " );
- sprintf( storeSuffix, ", %d, result )", current_result );
+ sprintf(storePrefix, "vstore3( ");
+ sprintf(storeSuffix, ", %d, result )", current_result);
}
else
{
- sprintf( storePrefix, "result[%d] = ", current_result );
- storeSuffix[ 0 ] = 0;
+ sprintf(storePrefix, "result[%d] = ", current_result);
+ storeSuffix[0] = 0;
}
- sprintf(line, "\t%s(%s%d)(", storePrefix, get_explicit_type_name(type), output_size);
+ sprintf(line, "\t%s(%s%d)(", storePrefix,
+ get_explicit_type_name(type), output_size);
current_result++;
int offset = 0;
- for (int i=0; i<vloads; i++) {
+ for (int i = 0; i < vloads; i++)
+ {
if (pos[i] == 0)
sprintf(line + strlen(line), "src[%d]", offset);
else
- sprintf(line + strlen(line), "vload%s(0,src+%d)", size_names[pos[i]], offset);
+ sprintf(line + strlen(line), "vload%s(0,src+%d)",
+ size_names[pos[i]], offset);
offset += sizes[pos[i]];
- if (i<(vloads-1))
- sprintf(line + strlen(line), ",");
+ if (i < (vloads - 1)) sprintf(line + strlen(line), ",");
}
sprintf(line + strlen(line), ")%s;\n", storeSuffix);
@@ -171,7 +187,8 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number
}
total_results++;
total_program_length += (int)strlen(line);
- if (total_program_length > MAX_CODE_SIZE) {
+ if (total_program_length > MAX_CODE_SIZE)
+ {
aborted_due_to_size = 1;
done = 1;
}
@@ -179,132 +196,194 @@ int create_kernel(ExplicitType type, int output_size, char *program, int *number
if (DEBUG) log_info("line is: %s", line);
- // If we did not use all of them, then we ignore any changes further to the right.
- // We do this by causing those loops to skip on the next iteration.
- if (vloads < DEPTH) {
+ // If we did not use all of them, then we ignore any changes further to
+ // the right. We do this by causing those loops to skip on the next
+ // iteration.
+ if (vloads < DEPTH)
+ {
if (DEBUG > 1) log_info("done with this depth\n");
- for (int k=vloads; k<DEPTH; k++)
- pos[k] = number_of_sizes;
+ for (int k = vloads; k < DEPTH; k++) pos[k] = number_of_sizes;
}
// Increment the far right size by 1, rolling over as needed
- for (int d=DEPTH-1; d>=0; d--) {
+ for (int d = DEPTH - 1; d >= 0; d--)
+ {
pos[d]++;
- if (pos[d] >= number_of_sizes) {
+ if (pos[d] >= number_of_sizes)
+ {
pos[d] = 0;
- if (d == 0) {
+ if (d == 0)
+ {
// If we rolled over at the far-left then we are done
done = 1;
break;
}
- } else {
+ }
+ else
+ {
break;
}
}
- if (done)
- break;
+ if (done) break;
// Continue until we are done.
}
- strcat(program, "}\n\n"); //log_info("%s\n", program);
+ strcat(program, "}\n\n"); // log_info("%s\n", program);
total_program_length += 3;
- if (DEBUG) log_info("\t\t(Program for vector type %s%s contains %d vector creations, of total program length %gkB, with a total of %d vloads.)\n",
- get_explicit_type_name(type), size_names[number_of_sizes-1], total_results, total_program_length/1024.0, total_vloads);
+ if (DEBUG)
+ log_info(
+ "\t\t(Program for vector type %s%s contains %d vector creations, "
+ "of total program length %gkB, with a total of %d vloads.)\n",
+ get_explicit_type_name(type), size_names[number_of_sizes - 1],
+ total_results, total_program_length / 1024.0, total_vloads);
*number_of_results = current_result;
- if (aborted_due_to_size)
- return total_results;
+ if (aborted_due_to_size) return total_results;
return 0;
}
-
-
-int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_vector_creation(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
- unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16};
+ const std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
+ kInt, kUInt, kLong, kULong,
+ kFloat, kHalf, kDouble };
+ // should be in sync with global array size_names
+ const std::vector<unsigned int> vecSizes = { 1, 2, 3, 4, 8, 16 };
- char *program_source;
- int error;
+ int error = CL_SUCCESS;
int total_errors = 0;
+ int number_of_results = 0;
- cl_int input_data_int[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
- cl_double input_data_double[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
- void *input_data_converted;
- void *output_data;
-
- int number_of_results;;
-
- input_data_converted = malloc(sizeof(cl_double)*16);
- program_source = (char*)malloc(sizeof(char)*1024*1024*4);
+ std::vector<char> input_data_converted(sizeof(cl_double) * 16);
+ std::vector<char> program_source(sizeof(char) * 1024 * 1024 * 4);
+ std::vector<char> output_data;
// Iterate over all the types
- for (int type_index=0; type_index<10; type_index++) {
- if(!gHasLong && ((vecType[type_index] == kLong) || (vecType[type_index] == kULong)))
+ for (size_t type_index = 0; type_index < vecType.size(); type_index++)
{
- log_info("Long/ULong data type not supported on this device\n");
- continue;
- }
-
- clMemWrapper input;
- if (vecType[type_index] == kDouble) {
- if (!is_extension_available(deviceID, "cl_khr_fp64")) {
- log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
+ if (!gHasLong
+ && ((vecType[type_index] == kLong)
+ || (vecType[type_index] == kULong)))
+ {
+ log_info("Long/ULong data type not supported on this device\n");
+ continue;
+ }
+ else if (vecType[type_index] == kDouble)
+ {
+ if (!is_extension_available(deviceID, "cl_khr_fp64"))
+ {
+ log_info("Extension cl_khr_fp64 not supported; skipping double "
+ "tests.\n");
continue;
}
- log_info("Testing doubles.\n");
+ snprintf(extension, sizeof(extension), "%s",
+ "#pragma OPENCL EXTENSION cl_khr_fp64 : enable");
}
+ else if (vecType[type_index] == kHalf)
+ {
+ if (!is_extension_available(deviceID, "cl_khr_fp16"))
+ {
+ log_info("Extension cl_khr_fp16 not supported; skipping half "
+ "tests.\n");
+ continue;
+ }
+ snprintf(extension, sizeof(extension), "%s",
+ "#pragma OPENCL EXTENSION cl_khr_fp16 : enable");
+ }
+
+ log_info("Testing %s.\n", get_explicit_type_name(vecType[type_index]));
// Convert the data to the right format for the test.
- memset(input_data_converted, 0xff, sizeof(cl_double)*16);
- if (vecType[type_index] != kDouble) {
- for (int j=0; j<16; j++) {
- convert_explicit_value(&input_data_int[j], ((char*)input_data_converted)+get_explicit_type_size(vecType[type_index])*j,
- kInt, 0, kRoundToEven, vecType[type_index]);
+ memset(input_data_converted.data(), 0xff, sizeof(cl_double) * 16);
+ if (vecType[type_index] == kDouble)
+ {
+ const cl_double input_data_double[16] = { 0, 1, 2, 3, 4, 5,
+ 6, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15 };
+ memcpy(input_data_converted.data(), &input_data_double,
+ sizeof(cl_double) * 16);
+ }
+ else if (vecType[type_index] == kHalf)
+ {
+ cl_half *buf =
+ reinterpret_cast<cl_half *>(input_data_converted.data());
+ for (int j = 0; j < 16; j++)
+ buf[j] = cl_half_from_float(float(j), CL_HALF_RTE);
+ }
+ else
+ {
+ for (int j = 0; j < 16; j++)
+ {
+ convert_explicit_value(
+ &j,
+ ((char *)input_data_converted.data())
+ + get_explicit_type_size(vecType[type_index]) * j,
+ kInt, 0, kRoundToEven, vecType[type_index]);
}
- } else {
- memcpy(input_data_converted, &input_data_double, sizeof(cl_double)*16);
}
- input = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, get_explicit_type_size(vecType[type_index])*16,
- (vecType[type_index] != kDouble) ? input_data_converted : input_data_double, &error);
- if (error) {
+ clMemWrapper input =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ get_explicit_type_size(vecType[type_index]) * 16,
+ input_data_converted.data(), &error);
+ if (error)
+ {
print_error(error, "clCreateBuffer failed");
total_errors++;
continue;
}
// Iterate over all the vector sizes.
- for (int size_index=1; size_index< 5; size_index++) {
- size_t global[] = {1,1,1};
+ for (size_t size_index = 1; size_index < vecSizes.size(); size_index++)
+ {
+ size_t global[] = { 1, 1, 1 };
int number_generated = -1;
int previous_number_generated = 0;
- log_info("Testing %s%s...\n", get_explicit_type_name(vecType[type_index]), size_names[size_index]);
- while (number_generated != 0) {
+ log_info("Testing %s%s...\n",
+ get_explicit_type_name(vecType[type_index]),
+ size_names[size_index]);
+ while (number_generated != 0)
+ {
clMemWrapper output;
clKernelWrapper kernel;
clProgramWrapper program;
- number_generated = create_kernel(vecType[type_index], vecSizes[size_index], program_source, &number_of_results, number_generated);
- if (number_generated != 0) {
+ number_generated =
+ create_kernel(vecType[type_index], vecSizes[size_index],
+ program_source.data(), &number_of_results,
+ number_generated);
+ if (number_generated != 0)
+ {
if (previous_number_generated == 0)
- log_info("Code size greater than %gkB; splitting test into multiple kernels.\n", MAX_CODE_SIZE/1024.0);
- log_info("\tExecuting vector permutations %d to %d...\n", previous_number_generated, number_generated-1);
+ log_info("Code size greater than %gkB; splitting test "
+ "into multiple kernels.\n",
+ MAX_CODE_SIZE / 1024.0);
+ log_info("\tExecuting vector permutations %d to %d...\n",
+ previous_number_generated, number_generated - 1);
}
- error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&program_source, "test_vector_creation");
- if (error) {
+ char *src = program_source.data();
+ error = create_single_kernel_helper(context, &program, &kernel,
+ 1, (const char **)&src,
+ "test_vector_creation");
+ if (error)
+ {
log_error("create_single_kernel_helper failed.\n");
total_errors++;
break;
}
- output = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
- number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
- NULL, &error);
- if (error) {
+ output = clCreateBuffer(
+ context, CL_MEM_WRITE_ONLY,
+ number_of_results
+ * get_explicit_type_size(vecType[type_index])
+ * vecSizes[size_index],
+ NULL, &error);
+ if (error)
+ {
print_error(error, "clCreateBuffer failed");
total_errors++;
break;
@@ -312,95 +391,115 @@ int test_vector_creation(cl_device_id deviceID, cl_context context, cl_command_q
error = clSetKernelArg(kernel, 0, sizeof(input), &input);
error |= clSetKernelArg(kernel, 1, sizeof(output), &output);
- if (error) {
+ if (error)
+ {
print_error(error, "clSetKernelArg failed");
total_errors++;
break;
}
- error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global, NULL, 0, NULL, NULL);
- if (error) {
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global,
+ NULL, 0, NULL, NULL);
+ if (error)
+ {
print_error(error, "clEnqueueNDRangeKernel failed");
total_errors++;
break;
}
error = clFinish(queue);
- if (error) {
+ if (error)
+ {
print_error(error, "clFinish failed");
total_errors++;
break;
}
- output_data = malloc(number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
- if (output_data == NULL) {
- log_error("Failed to allocate memory for output data.\n");
- total_errors++;
- break;
- }
- memset(output_data, 0xff, number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index]);
- error = clEnqueueReadBuffer(queue, output, CL_TRUE, 0,
- number_of_results*get_explicit_type_size(vecType[type_index])*vecSizes[size_index],
- output_data, 0, NULL, NULL);
- if (error) {
+ output_data.resize(number_of_results
+ * get_explicit_type_size(vecType[type_index])
+ * vecSizes[size_index]);
+ memset(output_data.data(), 0xff,
+ number_of_results
+ * get_explicit_type_size(vecType[type_index])
+ * vecSizes[size_index]);
+ error = clEnqueueReadBuffer(
+ queue, output, CL_TRUE, 0,
+ number_of_results
+ * get_explicit_type_size(vecType[type_index])
+ * vecSizes[size_index],
+ output_data.data(), 0, NULL, NULL);
+ if (error)
+ {
print_error(error, "clEnqueueReadBuffer failed");
total_errors++;
- free(output_data);
break;
}
// Check the results
- char *res = (char *)output_data;
- char *exp = (char *)input_data_converted;
- for (int i=0; i<number_of_results; i++) {
+ char *res = (char *)output_data.data();
+ char *exp = (char *)input_data_converted.data();
+ for (int i = 0; i < number_of_results; i++)
+ {
// If they do not match, then print out why
- if (memcmp(input_data_converted,
- res + i*(get_explicit_type_size(vecType[type_index])*vecSizes[size_index]),
- get_explicit_type_size(vecType[type_index])*vecSizes[size_index])
- ) {
+ if (memcmp(exp,
+ res
+ + i
+ * (get_explicit_type_size(
+ vecType[type_index])
+ * vecSizes[size_index]),
+ get_explicit_type_size(vecType[type_index])
+ * vecSizes[size_index]))
+ {
log_error("Data failed to validate for result %d\n", i);
- // Find the line in the program that failed. This is ugly.
- char search[32];
- char found_line[1024];
- found_line[0]='\0';
- search[0]='\0';
+ // Find the line in the program that failed. This is
+ // ugly.
+ char search[32] = { 0 };
+ char found_line[1024] = { 0 };
sprintf(search, "result[%d] = (", i);
- char *start_loc = strstr(program_source, search);
+ char *start_loc = strstr(program_source.data(), search);
if (start_loc == NULL)
- log_error("Failed to find program source for failure for %s in \n%s", search, program_source);
- else {
- char *end_loc = strstr(start_loc, "\n");
- memcpy(&found_line, start_loc, (end_loc-start_loc));
- found_line[end_loc-start_loc]='\0';
- log_error("Failed vector line: %s\n", found_line);
+ log_error("Failed to find program source for "
+ "failure for %s in \n%s",
+ search, program_source.data());
+ else
+ {
+ char *end_loc = strstr(start_loc, "\n");
+ memcpy(&found_line, start_loc,
+ (end_loc - start_loc));
+ found_line[end_loc - start_loc] = '\0';
+ log_error("Failed vector line: %s\n", found_line);
}
- for (int j=0; j<(int)vecSizes[size_index]; j++) {
- char expected_value[64];
- char returned_value[64];
- expected_value[0]='\0';
- returned_value[0]='\0';
- print_type_to_string(vecType[type_index], (void*)(res+get_explicit_type_size(vecType[type_index])*(i*vecSizes[size_index]+j)), returned_value);
- print_type_to_string(vecType[type_index], (void*)(exp+get_explicit_type_size(vecType[type_index])*j), expected_value);
- log_error("index [%d, component %d]: got: %s expected: %s\n", i, j,
- returned_value, expected_value);
+ for (int j = 0; j < (int)vecSizes[size_index]; j++)
+ {
+ char expected_value[64] = { 0 };
+ char returned_value[64] = { 0 };
+ print_type_to_string(
+ vecType[type_index],
+ (void *)(res
+ + get_explicit_type_size(
+ vecType[type_index])
+ * (i * vecSizes[size_index] + j)),
+ returned_value);
+ print_type_to_string(
+ vecType[type_index],
+ (void *)(exp
+ + get_explicit_type_size(
+ vecType[type_index])
+ * j),
+ expected_value);
+ log_error("index [%d, component %d]: got: %s "
+ "expected: %s\n",
+ i, j, returned_value, expected_value);
}
-
total_errors++;
}
}
- free(output_data);
previous_number_generated = number_generated;
} // number_generated != 0
-
} // vector sizes
} // vector types
- free(input_data_converted);
- free(program_source);
-
return total_errors;
}
-
-
diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp
index 884bcf36..fdbc8919 100644
--- a/test_conformance/basic/test_vector_swizzle.cpp
+++ b/test_conformance/basic/test_vector_swizzle.cpp
@@ -22,6 +22,8 @@
#include "procs.h"
#include "harness/testHarness.h"
+static std::string pragma_extension;
+
template <int N> struct TestInfo
{
};
@@ -629,7 +631,9 @@ static int test_vectype(const char* type_name, cl_device_id device,
clProgramWrapper program;
clKernelWrapper kernel;
- const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
+ std::string program_src =
+ pragma_extension + std::string(TestInfo<N>::kernel_source_xyzw);
+ const char* xyzw_source = program_src.c_str();
error = create_single_kernel_helper(
context, &program, &kernel, 1, &xyzw_source,
"test_vector_swizzle_xyzw", buildOptions.c_str());
@@ -643,7 +647,9 @@ static int test_vectype(const char* type_name, cl_device_id device,
clProgramWrapper program;
clKernelWrapper kernel;
- const char* sN_source = TestInfo<N>::kernel_source_sN;
+ std::string program_src =
+ pragma_extension + std::string(TestInfo<N>::kernel_source_sN);
+ const char* sN_source = program_src.c_str();
error = create_single_kernel_helper(
context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN",
buildOptions.c_str());
@@ -660,7 +666,9 @@ static int test_vectype(const char* type_name, cl_device_id device,
const Version device_version = get_device_cl_version(device);
if (device_version >= Version(3, 0))
{
- const char* rgba_source = TestInfo<N>::kernel_source_rgba;
+ std::string program_src =
+ pragma_extension + std::string(TestInfo<N>::kernel_source_rgba);
+ const char* rgba_source = program_src.c_str();
error = create_single_kernel_helper(
context, &program, &kernel, 1, &rgba_source,
"test_vector_swizzle_rgba", buildOptions.c_str());
@@ -689,6 +697,7 @@ int test_vector_swizzle(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
int hasDouble = is_extension_available(device, "cl_khr_fp64");
+ int hasHalf = is_extension_available(device, "cl_khr_fp16");
int result = TEST_PASS;
result |= test_type<cl_char>("char", device, context, queue);
@@ -703,8 +712,14 @@ int test_vector_swizzle(cl_device_id device, cl_context context,
result |= test_type<cl_ulong>("ulong", device, context, queue);
}
result |= test_type<cl_float>("float", device, context, queue);
+ if (hasHalf)
+ {
+ pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+ result |= test_type<cl_half>("half", device, context, queue);
+ }
if (hasDouble)
{
+ pragma_extension = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
result |= test_type<cl_double>("double", device, context, queue);
}
return result;
diff --git a/test_conformance/basic/test_vloadstore.cpp b/test_conformance/basic/test_vloadstore.cpp
index e137f9e7..d34ecbf9 100644
--- a/test_conformance/basic/test_vloadstore.cpp
+++ b/test_conformance/basic/test_vloadstore.cpp
@@ -13,52 +13,129 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#include "harness/compat.h"
-
+#include <algorithm>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <vector>
+#include <CL/cl_half.h>
#include "procs.h"
#include "harness/conversions.h"
-#include "harness/typeWrappers.h"
#include "harness/errorHelpers.h"
+#include "harness/stringHelpers.h"
+#include "harness/typeWrappers.h"
// Outputs debug information for stores
#define DEBUG 0
// Forces stores/loads to be done with offsets = tid
#define LINEAR_OFFSETS 0
#define NUM_LOADS 512
-
-static const char *doubleExtensionPragma = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+#define HFF(num) cl_half_from_float(num, halfRoundingMode)
+#define HTF(num) cl_half_to_float(num)
+
+char pragma_str[128] = { 0 };
+char mem_type[64] = { 0 };
+char store_str[128] = { 0 };
+char load_str[128] = { 0 };
+
+extern cl_half_rounding_mode halfRoundingMode;
+
+// clang-format off
+static const char *store_pattern= "results[ tid ] = tmp;\n";
+static const char *store_patternV3 = "results[3*tid] = tmp.s0; results[3*tid+1] = tmp.s1; results[3*tid+2] = tmp.s2;\n";
+static const char *load_pattern = "sSharedStorage[ i ] = src[ i ];\n";
+static const char *load_patternV3 = "sSharedStorage[3*i] = src[ 3*i]; sSharedStorage[3*i+1] = src[3*i+1]; sSharedStorage[3*i+2] = src[3*i+2];\n";
+static const char *kernel_pattern[] = {
+pragma_str,
+"#define STYPE %s\n"
+"__kernel void test_fn( ", mem_type, " STYPE *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+"{\n"
+" int tid = get_global_id( 0 );\n"
+" %s%d tmp = vload%d( offsets[ tid ], ( (", mem_type, " STYPE *) src ) + alignmentOffsets[ tid ] );\n"
+" ", store_str,
+"}\n"
+};
+
+const char *pattern_local [] = {
+pragma_str,
+"__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+"{\n"
+" int tid = get_global_id( 0 );\n"
+" int lid = get_local_id( 0 );\n"
+"\n"
+" if( lid == 0 )\n"
+" {\n"
+" for( int i = 0; i < %d; i++ ) {\n"
+" ", load_str,
+" }\n"
+" }\n"
+// Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
+// threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
+// updated on all threads at that point
+" barrier( CLK_LOCAL_MEM_FENCE );\n"
+"\n"
+" %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
+" ", store_str,
+"}\n" };
+
+const char *pattern_priv [] = {
+pragma_str,
+// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
+// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
+"#define PRIV_TYPE %s\n"
+"#define PRIV_SIZE %d\n"
+"__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
+"{\n"
+" __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
+" int tid = get_global_id( 0 );\n"
+"\n"
+" for( int i = 0; i < PRIV_SIZE; i++ )\n"
+" sPrivateStorage[ i ] = src[ i ];\n"
+// Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
+// anybody else to sync up
+"\n"
+" %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
+" ", store_str,
+"}\n"};
+// clang-format on
#pragma mark -------------------- vload harness --------------------------
-typedef void (*create_vload_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize );
+typedef void (*create_program_fn)(std::string &, size_t, ExplicitType, size_t,
+ size_t);
+typedef int (*test_fn)(cl_device_id, cl_context, cl_command_queue, ExplicitType,
+ unsigned int, create_program_fn, size_t);
-int test_vload( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
- create_vload_program_fn createFn, size_t bufferSize, MTdata d )
+int test_vload(cl_device_id device, cl_context context, cl_command_queue queue,
+ ExplicitType type, unsigned int vecSize,
+ create_program_fn createFn, size_t bufferSize)
{
- int error;
-
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 4 ];
+ MTdataHolder d(gRandomSeed);
const size_t numLoads = (DEBUG) ? 16 : NUM_LOADS;
if (DEBUG) bufferSize = (bufferSize < 128) ? bufferSize : 128;
size_t threads[ 1 ], localThreads[ 1 ];
clProtectedArray inBuffer( bufferSize );
- char programSrc[ 10240 ];
cl_uint offsets[ numLoads ], alignmentOffsets[ numLoads ];
size_t numElements, typeSize, i;
unsigned int outVectorSize;
+ pragma_str[0] = '\0';
+ if (type == kDouble)
+ std::snprintf(pragma_str, sizeof(pragma_str),
+ "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
+ else if (type == kHalf)
+ std::snprintf(pragma_str, sizeof(pragma_str),
+ "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
typeSize = get_explicit_type_size( type );
numElements = bufferSize / ( typeSize * vecSize );
@@ -83,25 +160,19 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue,
outVectorSize = vecSize;
// Declare output buffers now
-#if !(defined(_WIN32) && defined(_MSC_VER))
- char outBuffer[ numLoads * typeSize * outVectorSize ];
- char referenceBuffer[ numLoads * typeSize * vecSize ];
-#else
- char* outBuffer = (char*)_malloca(numLoads * typeSize * outVectorSize * sizeof(cl_char));
- char* referenceBuffer = (char*)_malloca(numLoads * typeSize * vecSize * sizeof(cl_char));
-#endif
+ std::vector<char> outBuffer(numLoads * typeSize * outVectorSize);
+ std::vector<char> referenceBuffer(numLoads * typeSize * vecSize);
// Create the program
-
-
+ std::string programSrc;
createFn( programSrc, numElements, type, vecSize, outVectorSize);
// Create our kernel
- const char *ptr = programSrc;
-
- error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+ const char *ptr = programSrc.c_str();
+ cl_int error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &ptr, "test_fn");
test_error( error, "Unable to create testing kernel" );
- if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
+ if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str());
// Get the number of args to differentiate the kernels with local storage. (They have 5)
cl_uint numArgs;
@@ -115,7 +186,9 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue,
test_error( error, "Unable to create kernel stream" );
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*sizeof(alignmentOffsets[0]), alignmentOffsets, &error );
test_error( error, "Unable to create kernel stream" );
- streams[ 3 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numLoads*typeSize*outVectorSize, (void *)outBuffer, &error );
+ streams[3] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ numLoads * typeSize * outVectorSize,
+ (void *)outBuffer.data(), &error);
test_error( error, "Unable to create kernel stream" );
// Set parameters and run
@@ -145,28 +218,32 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue,
test_error( error, "Unable to exec kernel" );
// Get the results
- error = clEnqueueReadBuffer( queue, streams[ 3 ], CL_TRUE, 0, numLoads * typeSize * outVectorSize * sizeof(cl_char), (void *)outBuffer, 0, NULL, NULL );
+ error = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0,
+ numLoads * typeSize * outVectorSize
+ * sizeof(cl_char),
+ (void *)outBuffer.data(), 0, NULL, NULL);
test_error( error, "Unable to read results" );
-
// Create the reference results
- memset( referenceBuffer, 0, numLoads * typeSize * vecSize * sizeof(cl_char));
+ referenceBuffer.assign(numLoads * typeSize * vecSize, 0);
for( i = 0; i < numLoads; i++ )
{
- memcpy( referenceBuffer + i * typeSize * vecSize, ( (char *)(void *)inBuffer ) + ( ( offsets[ i ] * vecSize ) + alignmentOffsets[ i ] ) * typeSize,
- typeSize * vecSize );
+ memcpy(&referenceBuffer[i * typeSize * vecSize],
+ ((char *)(void *)inBuffer)
+ + ((offsets[i] * vecSize) + alignmentOffsets[i]) * typeSize,
+ typeSize * vecSize);
}
// Validate the results now
- char *expected = referenceBuffer;
- char *actual = outBuffer;
+ char *expected = referenceBuffer.data();
+ char *actual = outBuffer.data();
char *in = (char *)(void *)inBuffer;
if (DEBUG) {
log_info("Memory contents:\n");
+ char inString[1024];
+ char expectedString[1024], actualString[1024];
for (i=0; i<numElements; i++) {
- char inString[1024];
- char expectedString[ 1024 ], actualString[ 1024 ];
if (i < numLoads) {
log_info("buffer %3d: input: %s expected: %s got: %s (load offset %3d, alignment offset %3d)", (int)i, GetDataVectorString( &(in[i*typeSize*vecSize]), typeSize, vecSize, inString ),
GetDataVectorString( &(expected[i*typeSize*vecSize]), typeSize, vecSize, expectedString ),
@@ -197,35 +274,42 @@ int test_vload( cl_device_id device, cl_context context, cl_command_queue queue,
expected += typeSize * vecSize;
actual += typeSize * outVectorSize;
}
-
return 0;
}
-int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queue, create_vload_program_fn createFn, size_t bufferSize )
+template <test_fn test_func_ptr>
+int test_vset(cl_device_id device, cl_context context, cl_command_queue queue,
+ create_program_fn createFn, size_t bufferSize)
{
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
+ std::vector<ExplicitType> vecType = { kChar, kUChar, kShort, kUShort,
+ kInt, kUInt, kLong, kULong,
+ kFloat, kHalf, kDouble };
unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
const char *size_names[] = { "2", "3", "4", "8", "16"};
- unsigned int typeIdx, sizeIdx;
int error = 0;
- MTdata mtData = init_genrand( gRandomSeed );
log_info("Testing with buffer size of %d.\n", (int)bufferSize);
- for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
- {
+ bool hasDouble = is_extension_available(device, "cl_khr_fp64");
+ bool hasHalf = is_extension_available(device, "cl_khr_fp16");
- if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
+ for (unsigned typeIdx = 0; typeIdx < vecType.size(); typeIdx++)
+ {
+ if (vecType[typeIdx] == kDouble && !hasDouble)
continue;
-
- if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
+ else if (vecType[typeIdx] == kHalf && !hasHalf)
+ continue;
+ else if ((vecType[typeIdx] == kLong || vecType[typeIdx] == kULong)
+ && !gHasLong)
continue;
- for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
+ for (unsigned sizeIdx = 0; vecSizes[sizeIdx] != 0; sizeIdx++)
{
log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
- int error_this_type = test_vload( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, mtData );
+ int error_this_type =
+ test_func_ptr(device, context, queue, vecType[typeIdx],
+ vecSizes[sizeIdx], createFn, bufferSize);
if (error_this_type) {
error += error_this_type;
log_error("Failure; skipping further sizes for this type.");
@@ -233,125 +317,59 @@ int test_vloadset(cl_device_id device, cl_context context, cl_command_queue queu
}
}
}
-
- free_mtdata(mtData);
-
return error;
}
#pragma mark -------------------- vload test cases --------------------------
-void create_global_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+void create_global_load_code(std::string &destBuffer, size_t inBufferSize,
+ ExplicitType type, size_t inVectorSize,
+ size_t outVectorSize)
{
- const char *pattern =
- "%s%s"
- "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
- "{\n"
- " int tid = get_global_id( 0 );\n"
- " %s%d tmp = vload%d( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
- " results[ tid ] = tmp;\n"
- "}\n";
-
- const char *patternV3 =
- "%s%s"
- "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
- "{\n"
- " int tid = get_global_id( 0 );\n"
- " %s3 tmp = vload3( offsets[ tid ], ( (__global %s *) src ) + alignmentOffsets[ tid ] );\n"
- " results[ 3*tid ] = tmp.s0;\n"
- " results[ 3*tid+1 ] = tmp.s1;\n"
- " results[ 3*tid+2 ] = tmp.s2;\n"
- "}\n";
-
+ std::snprintf(mem_type, sizeof(mem_type), "__global");
+ std::snprintf(store_str, sizeof(store_str), store_patternV3);
const char *typeName = get_explicit_type_name(type);
- if(inVectorSize == 3) {
- sprintf( destBuffer, patternV3,
- type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, typeName, typeName, typeName );
- } else {
- sprintf( destBuffer, pattern, type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
- (int)inVectorSize, typeName );
+ std::string outTypeName = typeName;
+ if (inVectorSize != 3)
+ {
+ outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize);
+ std::snprintf(store_str, sizeof(store_str), store_pattern);
}
+
+ std::string kernel_src = concat_kernel(
+ kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0]));
+ destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(),
+ typeName, (int)inVectorSize, (int)inVectorSize);
}
int test_vload_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
- return test_vloadset( device, context, queue, create_global_load_code, 10240 );
+ return test_vset<test_vload>(device, context, queue,
+ create_global_load_code, 10240);
}
-
-void create_local_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+void create_local_load_code(std::string &destBuffer, size_t inBufferSize,
+ ExplicitType type, size_t inVectorSize,
+ size_t outVectorSize)
{
- const char *pattern =
- "%s%s"
- //" __local %s%d sSharedStorage[ %d ];\n"
- "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
- "{\n"
- " int tid = get_global_id( 0 );\n"
- " int lid = get_local_id( 0 );\n"
- "\n"
- " if( lid == 0 )\n"
- " {\n"
- " for( int i = 0; i < %d; i++ )\n"
- " sSharedStorage[ i ] = src[ i ];\n"
- " }\n"
- // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
- // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
- // updated on all threads at that point
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- "\n"
- " %s%d tmp = vload%d( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
- " results[ tid ] = tmp;\n"
- "}\n";
-
- const char *patternV3 =
- "%s%s"
- //" __local %s%d sSharedStorage[ %d ];\n"
- "__kernel void test_fn(__local %s *sSharedStorage, __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
- "{\n"
- " int tid = get_global_id( 0 );\n"
- " int lid = get_local_id( 0 );\n"
- "\n"
- " if( lid == 0 )\n"
- " {\n"
- " for( int i = 0; i < %d; i++ ) {\n"
- " sSharedStorage[ 3*i ] = src[ 3*i ];\n"
- " sSharedStorage[ 3*i +1] = src[ 3*i +1];\n"
- " sSharedStorage[ 3*i +2] = src[ 3*i +2];\n"
- " }\n"
- " }\n"
- // Note: the above loop will only run on the first thread of each local group, but this barrier should ensure that all
- // threads are caught up (including the first one with the copy) before any proceed, i.e. the shared storage should be
- // updated on all threads at that point
- " barrier( CLK_LOCAL_MEM_FENCE );\n"
- "\n"
- " %s3 tmp = vload3( offsets[ tid ], ( (__local %s *) sSharedStorage ) + alignmentOffsets[ tid ] );\n"
- " results[ 3*tid ] = tmp.s0;\n"
- " results[ 3*tid +1] = tmp.s1;\n"
- " results[ 3*tid +2] = tmp.s2;\n"
- "}\n";
-
+ std::snprintf(store_str, sizeof(store_str), store_patternV3);
+ std::snprintf(load_str, sizeof(load_str), load_patternV3);
const char *typeName = get_explicit_type_name(type);
- if(inVectorSize == 3) {
- sprintf( destBuffer, patternV3,
- type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, /*(int)inBufferSize,*/
- typeName, typeName,
- (int)inBufferSize,
- typeName, typeName );
- } else {
- sprintf( destBuffer, pattern,
- type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, (int)inVectorSize, /*(int)inBufferSize,*/
- typeName, (int)inVectorSize, typeName, (int)outVectorSize,
- (int)inBufferSize,
- typeName, (int)inVectorSize, (int)inVectorSize, typeName );
+ std::string outTypeName = typeName;
+ std::string inTypeName = typeName;
+ if (inVectorSize != 3)
+ {
+ outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize);
+ inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize);
+ std::snprintf(store_str, sizeof(store_str), store_pattern);
+ std::snprintf(load_str, sizeof(load_str), load_pattern);
}
+
+ std::string kernel_src = concat_kernel(
+ pattern_local, sizeof(pattern_local) / sizeof(pattern_local[0]));
+ destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), inTypeName.c_str(),
+ outTypeName.c_str(), (int)inBufferSize, typeName,
+ (int)inVectorSize, (int)inVectorSize, typeName);
}
int test_vload_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
@@ -360,53 +378,34 @@ int test_vload_local(cl_device_id device, cl_context context, cl_command_queue q
cl_ulong localSize;
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
test_error( error, "Unable to get max size of local memory buffer" );
- if( localSize > 10240 )
- localSize = 10240;
+ if (localSize > 10240) localSize = 10240;
if (localSize > 4096)
localSize -= 2048;
else
localSize /= 2;
- return test_vloadset( device, context, queue, create_local_load_code, (size_t)localSize );
+ return test_vset<test_vload>(device, context, queue, create_local_load_code,
+ (size_t)localSize);
}
-
-void create_constant_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+void create_constant_load_code(std::string &destBuffer, size_t inBufferSize,
+ ExplicitType type, size_t inVectorSize,
+ size_t outVectorSize)
{
- const char *pattern =
- "%s%s"
- "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
- "{\n"
- " int tid = get_global_id( 0 );\n"
- " %s%d tmp = vload%d( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
- " results[ tid ] = tmp;\n"
- "}\n";
-
- const char *patternV3 =
- "%s%s"
- "__kernel void test_fn( __constant %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
- "{\n"
- " int tid = get_global_id( 0 );\n"
- " %s3 tmp = vload3( offsets[ tid ], ( (__constant %s *) src ) + alignmentOffsets[ tid ] );\n"
- " results[ 3*tid ] = tmp.s0;\n"
- " results[ 3*tid+1 ] = tmp.s1;\n"
- " results[ 3*tid+2 ] = tmp.s2;\n"
- "}\n";
-
+ std::snprintf(mem_type, sizeof(mem_type), "__constant");
+ std::snprintf(store_str, sizeof(store_str), store_patternV3);
const char *typeName = get_explicit_type_name(type);
- if(inVectorSize == 3) {
- sprintf( destBuffer, patternV3,
- type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, typeName, typeName,
- typeName );
- } else {
- sprintf( destBuffer, pattern,
- type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, typeName, (int)outVectorSize, typeName, (int)inVectorSize,
- (int)inVectorSize, typeName );
+ std::string outTypeName = typeName;
+ if (inVectorSize != 3)
+ {
+ outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize);
+ std::snprintf(store_str, sizeof(store_str), store_pattern);
}
+
+ std::string kernel_src = concat_kernel(
+ kernel_pattern, sizeof(kernel_pattern) / sizeof(kernel_pattern[0]));
+ destBuffer = str_sprintf(kernel_src, typeName, outTypeName.c_str(),
+ typeName, (int)inVectorSize, (int)inVectorSize);
}
int test_vload_constant(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
@@ -415,109 +414,71 @@ int test_vload_constant(cl_device_id device, cl_context context, cl_command_queu
cl_ulong maxSize;
int error = clGetDeviceInfo( device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, NULL );
test_error( error, "Unable to get max size of constant memory buffer" );
- if( maxSize > 10240 )
- maxSize = 10240;
+ if (maxSize > 10240) maxSize = 10240;
if (maxSize > 4096)
maxSize -= 2048;
else
maxSize /= 2;
- return test_vloadset( device, context, queue, create_constant_load_code, (size_t)maxSize );
+ return test_vset<test_vload>(device, context, queue,
+ create_constant_load_code, (size_t)maxSize);
}
-
-void create_private_load_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize, size_t outVectorSize )
+void create_private_load_code(std::string &destBuffer, size_t inBufferSize,
+ ExplicitType type, size_t inVectorSize,
+ size_t outVectorSize)
{
- const char *pattern =
- "%s%s"
- // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
- // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
- "#define PRIV_TYPE %s%d\n"
- "#define PRIV_SIZE %d\n"
- "__kernel void test_fn( __global %s%d *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s%d *results )\n"
- "{\n"
- " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
- " int tid = get_global_id( 0 );\n"
- "\n"
- " for( int i = 0; i < %d; i++ )\n"
- " sPrivateStorage[ i ] = src[ i ];\n"
- // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
- // anybody else to sync up
- "\n"
- " %s%d tmp = vload%d( offsets[ tid ], ( (__private %s *) sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
- " results[ tid ] = tmp;\n"
- "}\n";
-
- const char *patternV3 =
- "%s%s"
- // Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
- // for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
- "#define PRIV_TYPE %s\n"
- "#define PRIV_SIZE %d\n"
- "__kernel void test_fn( __global %s *src, __global uint *offsets, __global uint *alignmentOffsets, __global %s *results )\n"
- "{\n"
- " __private PRIV_TYPE sPrivateStorage[ PRIV_SIZE ];\n"
- " int tid = get_global_id( 0 );\n"
- "\n"
- " for( int i = 0; i < PRIV_SIZE; i++ )\n"
- " {\n"
- " sPrivateStorage[ i ] = src[ i ];\n"
- " }\n"
- // Note: unlike the local test, each thread runs the above copy loop independently, so nobody needs to wait for
- // anybody else to sync up
- "\n"
- " %s3 tmp = vload3( offsets[ tid ], ( sPrivateStorage ) + alignmentOffsets[ tid ] );\n"
- " results[ 3*tid ] = tmp.s0;\n"
- " results[ 3*tid+1 ] = tmp.s1;\n"
- " results[ 3*tid+2 ] = tmp.s2;\n"
- "}\n";
-
+ std::snprintf(store_str, sizeof(store_str), store_patternV3);
const char *typeName = get_explicit_type_name(type);
- if(inVectorSize ==3) {
- sprintf( destBuffer, patternV3,
- type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, 3*((int)inBufferSize),
- typeName, typeName,
- typeName );
- // log_info("Src is \"\n%s\n\"\n", destBuffer);
- } else {
- sprintf( destBuffer, pattern,
- type == kDouble ? doubleExtensionPragma : "",
- "",
- typeName, (int)inVectorSize, (int)inBufferSize,
- typeName, (int)inVectorSize, typeName, (int)outVectorSize,
- (int)inBufferSize,
- typeName, (int)inVectorSize, (int)inVectorSize, typeName );
+ std::string outTypeName = typeName;
+ std::string inTypeName = typeName;
+ int bufSize = (int)inBufferSize * 3;
+ if (inVectorSize != 3)
+ {
+ outTypeName = str_sprintf("%s%d", typeName, (int)outVectorSize);
+ inTypeName = str_sprintf("%s%d", typeName, (int)inVectorSize);
+ bufSize = (int)inBufferSize;
+ std::snprintf(store_str, sizeof(store_str), store_pattern);
}
+
+ std::string kernel_src = concat_kernel(
+ pattern_priv, sizeof(pattern_priv) / sizeof(pattern_priv[0]));
+ destBuffer = str_sprintf(kernel_src, inTypeName.c_str(), bufSize,
+ inTypeName.c_str(), outTypeName.c_str(), typeName,
+ (int)inVectorSize, (int)inVectorSize, typeName);
}
int test_vload_private(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
// We have no idea how much actual private storage is available, so just pick a reasonable value,
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
- return test_vloadset( device, context, queue, create_private_load_code, 256 );
+ return test_vset<test_vload>(device, context, queue,
+ create_private_load_code, 256);
}
-
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#pragma mark -------------------- vstore harness --------------------------
-typedef void (*create_vstore_program_fn)( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize );
-
-int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue, ExplicitType type, unsigned int vecSize,
- create_vstore_program_fn createFn, size_t bufferSize, MTdata d )
+int test_vstore(cl_device_id device, cl_context context, cl_command_queue queue,
+ ExplicitType type, unsigned int vecSize,
+ create_program_fn createFn, size_t bufferSize)
{
- int error;
-
clProgramWrapper program;
clKernelWrapper kernel;
clMemWrapper streams[ 3 ];
+ MTdataHolder d(gRandomSeed);
size_t threads[ 1 ], localThreads[ 1 ];
-
size_t numElements, typeSize, numStores = (DEBUG) ? 16 : NUM_LOADS;
+ pragma_str[0] = '\0';
+ if (type == kDouble)
+ std::snprintf(pragma_str, sizeof(pragma_str),
+ "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
+ else if (type == kHalf)
+ std::snprintf(pragma_str, sizeof(pragma_str),
+ "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+
if (DEBUG)
bufferSize = (bufferSize < 128) ? bufferSize : 128;
@@ -534,39 +495,22 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
}
if (DEBUG)
log_info("Testing: numStores: %d, typeSize: %d, vecSize: %d, numElements: %d, bufferSize: %d\n", (int)numStores, (int)typeSize, vecSize, (int)numElements, (int)bufferSize);
-#if !(defined(_WIN32) && defined(_MSC_VER))
- cl_uint offsets[ numStores ];
-#else
- cl_uint* offsets = (cl_uint*)_malloca(numStores * sizeof(cl_uint));
-#endif
- char programSrc[ 10240 ];
- size_t i;
-
-#if !(defined(_WIN32) && defined(_MSC_VER))
- char inBuffer[ numStores * typeSize * vecSize ];
-#else
- char* inBuffer = (char*)_malloca( numStores * typeSize * vecSize * sizeof(cl_char));
-#endif
+
+ std::vector<cl_uint> offsets(numStores);
+ std::vector<char> inBuffer(numStores * typeSize * vecSize);
+
clProtectedArray outBuffer( numElements * typeSize * vecSize );
-#if !(defined(_WIN32) && defined(_MSC_VER))
- char referenceBuffer[ numElements * typeSize * vecSize ];
-#else
- char* referenceBuffer = (char*)_malloca(numElements * typeSize * vecSize * sizeof(cl_char));
-#endif
+ std::vector<char> referenceBuffer(numElements * typeSize * vecSize);
// Create some random input data and random offsets to load from
- generate_random_data( type, numStores * vecSize, d, (void *)inBuffer );
+ generate_random_data(type, numStores * vecSize, d, (void *)inBuffer.data());
// Note: make sure no two offsets are the same, otherwise the output would depend on
// the order that threads ran in, and that would be next to impossible to verify
-#if !(defined(_WIN32) && defined(_MSC_VER))
- char flags[ numElements ];
-#else
- char* flags = (char*)_malloca( numElements * sizeof(char));
-#endif
-
- memset( flags, 0, numElements * sizeof(char) );
- for( i = 0; i < numStores; i++ )
+ std::vector<char> flags(numElements);
+ flags.assign(flags.size(), 0);
+
+ for (size_t i = 0; i < numStores; i++)
{
do
{
@@ -579,13 +523,15 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
if (LINEAR_OFFSETS)
log_info("Offsets set to thread IDs to simplify output.\n");
- createFn( programSrc, numElements, type, vecSize );
+ std::string programSrc;
+ createFn(programSrc, numElements, type, vecSize, vecSize);
// Create our kernel
- const char *ptr = programSrc;
- error = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test_fn" );
+ const char *ptr = programSrc.c_str();
+ cl_int error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &ptr, "test_fn");
test_error( error, "Unable to create testing kernel" );
- if (DEBUG) log_info("Kernel: \n%s\n", programSrc);
+ if (DEBUG) log_info("Kernel: \n%s\n", programSrc.c_str());
// Get the number of args to differentiate the kernels with local storage. (They have 5)
cl_uint numArgs;
@@ -593,9 +539,14 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
test_error( error, "clGetKernelInfo failed");
// Set up parameters
- streams[ 0 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * typeSize * vecSize * sizeof(cl_char), (void *)inBuffer, &error );
+ streams[0] =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ numStores * typeSize * vecSize * sizeof(cl_char),
+ (void *)inBuffer.data(), &error);
test_error( error, "Unable to create kernel stream" );
- streams[ 1 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numStores * sizeof(cl_uint), offsets, &error );
+ streams[1] =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ numStores * sizeof(cl_uint), offsets.data(), &error);
test_error( error, "Unable to create kernel stream" );
streams[ 2 ] = clCreateBuffer( context, CL_MEM_COPY_HOST_PTR, numElements * typeSize * vecSize, (void *)outBuffer, &error );
test_error( error, "Unable to create kernel stream" );
@@ -606,7 +557,7 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
// We need to set the size of the local storage
error = clSetKernelArg(kernel, 0, bufferSize, NULL);
test_error( error, "clSetKernelArg for buffer failed");
- for( i = 0; i < 3; i++ )
+ for (size_t i = 0; i < 3; i++)
{
error = clSetKernelArg( kernel, (int)i+1, sizeof( streams[ i ] ), &streams[ i ] );
test_error( error, "Unable to set kernel argument" );
@@ -615,11 +566,10 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
else
{
// No local storage
- for( i = 0; i < 3; i++ )
+ for (size_t i = 0; i < 3; i++)
{
error = clSetKernelArg( kernel, (int)i, sizeof( streams[ i ] ), &streams[ i ] );
- if (error)
- log_info("%s\n", programSrc);
+ if (error) log_info("%s\n", programSrc.c_str());
test_error( error, "Unable to set kernel argument" );
}
}
@@ -654,25 +604,26 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
error = clEnqueueReadBuffer( queue, streams[ 2 ], CL_TRUE, 0, numElements * typeSize * vecSize, (void *)outBuffer, 0, NULL, NULL );
test_error( error, "Unable to read results" );
-
// Create the reference results
- memset( referenceBuffer, 0, numElements * typeSize * vecSize * sizeof(cl_char) );
- for( i = 0; i < numStores; i++ )
+ referenceBuffer.assign(referenceBuffer.size(), 0);
+ for (size_t i = 0; i < numStores; i++)
{
- memcpy( referenceBuffer + ( ( offsets[ i ] * vecSize ) + addressOffset ) * typeSize, inBuffer + i * typeSize * vecSize, typeSize * vecSize );
+ memcpy(&referenceBuffer[((offsets[i] * vecSize) + addressOffset)
+ * typeSize],
+ &inBuffer[i * typeSize * vecSize], typeSize * vecSize);
}
// Validate the results now
- char *expected = referenceBuffer;
+ char *expected = referenceBuffer.data();
char *actual = (char *)(void *)outBuffer;
if (DEBUG)
{
log_info("Memory contents:\n");
- for (i=0; i<numElements; i++)
+ char inString[1024];
+ char expectedString[1024], actualString[1024];
+ for (size_t i = 0; i < numElements; i++)
{
- char inString[1024];
- char expectedString[ 1024 ], actualString[ 1024 ];
if (i < numStores)
{
log_info("buffer %3d: input: %s expected: %s got: %s (store offset %3d)", (int)i, GetDataVectorString( &(inBuffer[i*typeSize*vecSize]), typeSize, vecSize, inString ),
@@ -693,7 +644,7 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
}
}
- for( i = 0; i < numElements; i++ )
+ for (size_t i = 0; i < numElements; i++)
{
if( memcmp( expected, actual, typeSize * vecSize ) != 0 )
{
@@ -719,62 +670,26 @@ int test_vstore( cl_device_id device, cl_context context, cl_command_queue queue
actual += typeSize * vecSize;
}
}
-
return 0;
}
-int test_vstoreset(cl_device_id device, cl_context context, cl_command_queue queue, create_vstore_program_fn createFn, size_t bufferSize )
-{
- ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble, kNumExplicitTypes };
- unsigned int vecSizes[] = { 2, 3, 4, 8, 16, 0 };
- const char *size_names[] = { "2", "3", "4", "8", "16"};
- unsigned int typeIdx, sizeIdx;
- int error = 0;
- MTdata d = init_genrand( gRandomSeed );
-
- log_info("Testing with buffer size of %d.\n", (int)bufferSize);
-
- for( typeIdx = 0; vecType[ typeIdx ] != kNumExplicitTypes; typeIdx++ )
- {
- if( vecType[ typeIdx ] == kDouble && !is_extension_available( device, "cl_khr_fp64" ) )
- continue;
-
- if(( vecType[ typeIdx ] == kLong || vecType[ typeIdx ] == kULong ) && !gHasLong )
- continue;
-
- for( sizeIdx = 0; vecSizes[ sizeIdx ] != 0; sizeIdx++ )
- {
- log_info("Testing %s%s...\n", get_explicit_type_name(vecType[typeIdx]), size_names[sizeIdx]);
-
- int error_this_type = test_vstore( device, context, queue, vecType[ typeIdx ], vecSizes[ sizeIdx ], createFn, bufferSize, d );
- if (error_this_type)
- {
- log_error("Failure; skipping further sizes for this type.\n");
- error += error_this_type;
- break;
- }
- }
- }
-
- free_mtdata(d);
- return error;
-}
-
-
#pragma mark -------------------- vstore test cases --------------------------
-void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+void create_global_store_code(std::string &destBuffer, size_t inBufferSize,
+ ExplicitType type, size_t inVectorSize,
+ size_t /*unused*/)
{
- const char *pattern =
- "%s"
+ // clang-format off
+ const char *pattern [] = {
+ pragma_str,
"__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
" vstore%d( srcValues[ tid ], offsets[ tid ], destBuffer + alignmentOffset );\n"
- "}\n";
+ "}\n" };
- const char *patternV3 =
- "%s"
+ const char *patternV3 [] = {
+ pragma_str,
"__kernel void test_fn( __global %s3 *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
@@ -783,45 +698,48 @@ void create_global_store_code( char *destBuffer, size_t inBufferSize, ExplicitTy
" } else {\n"
" vstore3( vload3(tid, (__global %s *)srcValues), offsets[ tid ], destBuffer + alignmentOffset );\n"
" }\n"
- "}\n";
+ "}\n" };
+ // clang-format on
const char *typeName = get_explicit_type_name(type);
-
if(inVectorSize == 3) {
- sprintf( destBuffer, patternV3,
- type == kDouble ? doubleExtensionPragma : "",
- typeName, typeName, typeName);
-
- } else {
- sprintf( destBuffer, pattern,
- type == kDouble ? doubleExtensionPragma : "",
- typeName, (int)inVectorSize, typeName, (int)inVectorSize );
+ std::string kernel_src =
+ concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0]));
+ destBuffer = str_sprintf(kernel_src, typeName, typeName, typeName);
+ }
+ else
+ {
+ std::string kernel_src =
+ concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0]));
+ destBuffer = str_sprintf(kernel_src, typeName, (int)inVectorSize,
+ typeName, (int)inVectorSize);
}
- // if(inVectorSize == 3 || inVectorSize == 4) {
- // log_info("\n----\n%s\n----\n", destBuffer);
- // }
}
int test_vstore_global(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
{
- return test_vstoreset( device, context, queue, create_global_store_code, 10240 );
+ return test_vset<test_vstore>(device, context, queue,
+ create_global_store_code, 10240);
}
-
-void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+void create_local_store_code(std::string &destBuffer, size_t inBufferSize,
+ ExplicitType type, size_t inVectorSize,
+ size_t /*unused*/)
{
- const char *pattern =
- "%s"
- "\n"
- "__kernel void test_fn(__local %s%d *sSharedStorage, __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
+ // clang-format off
+ const char *pattern[] = {
+ pragma_str,
+ "#define LOC_TYPE %s\n"
+ "#define LOC_VTYPE %s%d\n"
+ "__kernel void test_fn(__local LOC_VTYPE *sSharedStorage, __global LOC_VTYPE *srcValues, __global uint *offsets, __global LOC_VTYPE *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
- " sSharedStorage[ offsets[tid] ] = (%s%d)(%s)0;\n"
+ " sSharedStorage[ offsets[tid] ] = (LOC_VTYPE)(LOC_TYPE)0;\n"
" sSharedStorage[ offsets[tid] +1 ] = sSharedStorage[ offsets[tid] ];\n"
" barrier( CLK_LOCAL_MEM_FENCE );\n"
"\n"
- " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local %s *)sSharedStorage ) + alignmentOffset );\n"
+ " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__local LOC_TYPE *)sSharedStorage ) + alignmentOffset );\n"
"\n"
// Note: Once all threads are done vstore'ing into our shared storage, we then copy into the global output
// buffer, but we have to make sure ALL threads are done vstore'ing before we do the copy
@@ -830,20 +748,20 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" int i;\n"
- " __local %s *sp = (__local %s*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
- " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
+ " __local LOC_TYPE *sp = (__local LOC_TYPE*) (sSharedStorage + offsets[tid]) + alignmentOffset;\n"
+ " __global LOC_TYPE *dp = (__global LOC_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
" for( i = 0; (size_t)i < sizeof( sSharedStorage[0]) / sizeof( *sp ); i++ ) \n"
" dp[i] = sp[i];\n"
- "}\n";
+ "}\n" };
- const char *patternV3 =
- "%s"
- "\n"
- "__kernel void test_fn(__local %s *sSharedStorage, __global %s *srcValues, __global uint *offsets, __global %s *destBuffer, uint alignmentOffset )\n"
+ const char *patternV3 [] = {
+ pragma_str,
+ "#define LOC_TYPE %s\n"
+ "__kernel void test_fn(__local LOC_TYPE *sSharedStorage, __global LOC_TYPE *srcValues, __global uint *offsets, __global LOC_TYPE *destBuffer, uint alignmentOffset )\n"
"{\n"
" int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
- " sSharedStorage[ 3*offsets[tid] ] = (%s)0;\n"
+ " sSharedStorage[ 3*offsets[tid] ] = (LOC_TYPE)0;\n"
" sSharedStorage[ 3*offsets[tid] +1 ] = \n"
" sSharedStorage[ 3*offsets[tid] ];\n"
" sSharedStorage[ 3*offsets[tid] +2 ] = \n"
@@ -865,30 +783,26 @@ void create_local_store_code( char *destBuffer, size_t inBufferSize, ExplicitTyp
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" int i;\n"
- " __local %s *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
- " __global %s *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
+ " __local LOC_TYPE *sp = (sSharedStorage + 3*offsets[tid]) + alignmentOffset;\n"
+ " __global LOC_TYPE *dp = (destBuffer + 3*offsets[tid]) + alignmentOffset;\n"
" for( i = 0; i < 3; i++ ) \n"
" dp[i] = sp[i];\n"
- "}\n";
+ "}\n" };
+ // clang-format on
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
- sprintf( destBuffer, patternV3,
- type == kDouble ? doubleExtensionPragma : "",
- typeName,
- typeName,
- typeName, typeName,
- typeName, typeName, typeName );
- } else {
- sprintf( destBuffer, pattern,
- type == kDouble ? doubleExtensionPragma : "",
- typeName, (int)inVectorSize,
- typeName, (int)inVectorSize, typeName, (int)inVectorSize,
- typeName, (int)inVectorSize, typeName,
- (int)inVectorSize, typeName, typeName,
- typeName, typeName, typeName );
+ std::string kernel_src =
+ concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0]));
+ destBuffer = str_sprintf(kernel_src, typeName);
+ }
+ else
+ {
+ std::string kernel_src =
+ concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0]));
+ destBuffer = str_sprintf(kernel_src, typeName, typeName,
+ (int)inVectorSize, (int)inVectorSize);
}
- // log_info(destBuffer);
}
int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems )
@@ -897,81 +811,82 @@ int test_vstore_local(cl_device_id device, cl_context context, cl_command_queue
cl_ulong localSize;
int error = clGetDeviceInfo( device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( localSize ), &localSize, NULL );
test_error( error, "Unable to get max size of local memory buffer" );
- if( localSize > 10240 )
- localSize = 10240;
+ if (localSize > 10240) localSize = 10240;
if (localSize > 4096)
localSize -= 2048;
else
localSize /= 2;
- return test_vstoreset( device, context, queue, create_local_store_code, (size_t)localSize );
+ return test_vset<test_vstore>(device, context, queue,
+ create_local_store_code, (size_t)localSize);
}
-
-void create_private_store_code( char *destBuffer, size_t inBufferSize, ExplicitType type, size_t inVectorSize )
+void create_private_store_code(std::string &destBuffer, size_t inBufferSize,
+ ExplicitType type, size_t inVectorSize,
+ size_t /*unused*/)
{
- const char *pattern =
- "%s"
+ // clang-format off
+ const char *pattern [] = {
+ pragma_str,
+ "#define PRIV_TYPE %s\n"
+ "#define PRIV_VTYPE %s%d\n"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"\n"
- "__kernel void test_fn( __global %s%d *srcValues, __global uint *offsets, __global %s%d *destBuffer, uint alignmentOffset )\n"
+ "__kernel void test_fn( __global PRIV_VTYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n"
"{\n"
- " __private %s%d sPrivateStorage[ %d ];\n"
- " int tid = get_global_id( 0 );\n"
+ " __private PRIV_VTYPE sPrivateStorage[ %d ];\n"
+ " int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
- " sPrivateStorage[tid] = (%s%d)(%s)0;\n"
+ " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n"
"\n"
- " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
+ " vstore%d( srcValues[ tid ], offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n"
"\n"
// Note: we only copy the relevant portion of our local storage over to the dest buffer, because
// otherwise, local threads would be overwriting results from other local threads
" uint i;\n"
- " __private %s *sp = (__private %s*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
- " __global %s *dp = (__global %s*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
+ " __private PRIV_TYPE *sp = (__private PRIV_TYPE*) (sPrivateStorage + offsets[tid]) + alignmentOffset;\n"
+ " __global PRIV_TYPE *dp = (__global PRIV_TYPE*) (destBuffer + offsets[tid]) + alignmentOffset;\n"
" for( i = 0; i < sizeof( sPrivateStorage[0]) / sizeof( *sp ); i++ ) \n"
" dp[i] = sp[i];\n"
- "}\n";
-
+ "}\n"};
- const char *patternV3 =
- "%s"
+ const char *patternV3 [] = {
+ pragma_str,
+ "#define PRIV_TYPE %s\n"
+ "#define PRIV_VTYPE %s3\n"
// Private memory is unique per thread, unlike local storage which is unique per local work group. Which means
// for this test, we have to copy the entire test buffer into private storage ON EACH THREAD to be an effective test
"\n"
- "__kernel void test_fn( __global %s *srcValues, __global uint *offsets, __global %s3 *destBuffer, uint alignmentOffset )\n"
+ "__kernel void test_fn( __global PRIV_TYPE *srcValues, __global uint *offsets, __global PRIV_VTYPE *destBuffer, uint alignmentOffset )\n"
"{\n"
- " __private %s3 sPrivateStorage[ %d ];\n" // keep this %d
- " int tid = get_global_id( 0 );\n"
+ " __private PRIV_VTYPE sPrivateStorage[ %d ];\n" // keep this %d
+ " int tid = get_global_id( 0 );\n"
// We need to zero the shared storage since any locations we don't write to will have garbage otherwise.
- " sPrivateStorage[tid] = (%s3)(%s)0;\n"
+ " sPrivateStorage[tid] = (PRIV_VTYPE)(PRIV_TYPE)0;\n"
"\n"
-
- " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private %s *)sPrivateStorage ) + alignmentOffset );\n"
- "\n"
- // Note: we only copy the relevant portion of our local storage over to the dest buffer, because
- // otherwise, local threads would be overwriting results from other local threads
+ " vstore3( vload3(tid,srcValues), offsets[ tid ], ( (__private PRIV_TYPE *)sPrivateStorage ) + alignmentOffset );\n"
" uint i;\n"
- " __private %s *sp = ((__private %s*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
- " __global %s *dp = ((__global %s*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
+ " __private PRIV_TYPE *sp = ((__private PRIV_TYPE*) sPrivateStorage) + 3*offsets[tid] + alignmentOffset;\n"
+ " __global PRIV_TYPE *dp = ((__global PRIV_TYPE*) destBuffer) + 3*offsets[tid] + alignmentOffset;\n"
" for( i = 0; i < 3; i++ ) \n"
" dp[i] = sp[i];\n"
- "}\n";
+ "}\n"};
+ // clang-format on
const char *typeName = get_explicit_type_name(type);
if(inVectorSize == 3) {
- sprintf( destBuffer, patternV3,
- type == kDouble ? doubleExtensionPragma : "",
- typeName, typeName,
- typeName, (int)inBufferSize,
- typeName, typeName,
- typeName, typeName, typeName, typeName, typeName );
- } else {
- sprintf( destBuffer, pattern,
- type == kDouble ? doubleExtensionPragma : "",
- typeName, (int)inVectorSize, typeName, (int)inVectorSize,
- typeName, (int)inVectorSize, (int)inBufferSize,
- typeName, (int)inVectorSize, typeName,
- (int)inVectorSize, typeName, typeName, typeName, typeName, typeName );
+ std::string kernel_src =
+ concat_kernel(patternV3, sizeof(patternV3) / sizeof(patternV3[0]));
+ destBuffer =
+ str_sprintf(kernel_src, typeName, typeName, (int)inBufferSize);
+ }
+ else
+ {
+ std::string kernel_src =
+ concat_kernel(pattern, sizeof(pattern) / sizeof(pattern[0]));
+ destBuffer =
+ str_sprintf(kernel_src, typeName, typeName, (int)inVectorSize,
+ (int)inBufferSize, (int)inVectorSize);
}
}
@@ -979,7 +894,8 @@ int test_vstore_private(cl_device_id device, cl_context context, cl_command_queu
{
// We have no idea how much actual private storage is available, so just pick a reasonable value,
// which is that we can fit at least two 16-element long, which is 2*8 bytes * 16 = 256 bytes
- return test_vstoreset( device, context, queue, create_private_store_code, 256 );
+ return test_vset<test_vstore>(device, context, queue,
+ create_private_store_code, 256);
}
diff --git a/test_conformance/basic/test_wg_barrier.cpp b/test_conformance/basic/test_wg_barrier.cpp
deleted file mode 100644
index a237d80b..00000000
--- a/test_conformance/basic/test_wg_barrier.cpp
+++ /dev/null
@@ -1,159 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-
-#include "procs.h"
-
-const char *wg_barrier_kernel_code =
-"__kernel void compute_sum(__global int *a, int n, __global int *tmp_sum, __global int *sum)\n"
-"{\n"
-" int tid = get_local_id(0);\n"
-" int lsize = get_local_size(0);\n"
-" int i;\n"
-"\n"
-" tmp_sum[tid] = 0;\n"
-" for (i=tid; i<n; i+=lsize)\n"
-" tmp_sum[tid] += a[i];\n"
-" \n"
-" // updated to work for any workgroup size \n"
-" for (i=hadd(lsize,1); lsize>1; i = hadd(i,1))\n"
-" {\n"
-" work_group_barrier(CLK_GLOBAL_MEM_FENCE);\n"
-" if (tid + i < lsize)\n"
-" tmp_sum[tid] += tmp_sum[tid + i];\n"
-" lsize = i; \n"
-" }\n"
-"\n"
-" //no barrier is required here because last person to write to tmp_sum[0] was tid 0 \n"
-" if (tid == 0)\n"
-" *sum = tmp_sum[0];\n"
-"}\n";
-
-
-static int
-verify_sum(int *inptr, int *tmpptr, int *outptr, int n)
-{
- int i;
- int reference = 0;
-
- for (i=0; i<n; i++)
- {
- reference += inptr[i];
- }
-
- if (reference != outptr[0])
- {
- log_error("work_group_barrier test failed\n");
- return -1;
- }
-
- log_info("work_group_barrier test passed\n");
- return 0;
-}
-
-
-int
-test_wg_barrier(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
-{
- cl_mem streams[3];
- cl_int *input_ptr = NULL, *output_ptr = NULL, *tmp_ptr =NULL;
- cl_program program;
- cl_kernel kernel;
- size_t global_threads[3];
- size_t local_threads[3];
- int err;
- int i;
- size_t max_local_workgroup_size[3];
- size_t max_threadgroup_size = 0;
- MTdata d;
-
- err = create_single_kernel_helper_with_build_options(
- context, &program, &kernel, 1, &wg_barrier_kernel_code, "compute_sum",
- nullptr);
- test_error(err, "Failed to build kernel/program.");
-
- err = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
- sizeof(max_threadgroup_size), &max_threadgroup_size, NULL);
- test_error(err, "clGetKernelWorkgroupInfo failed.");
-
- err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
- test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
-
- // Pick the minimum of the device and the kernel
- if (max_threadgroup_size > max_local_workgroup_size[0])
- max_threadgroup_size = max_local_workgroup_size[0];
-
- // work group size must divide evenly into the global size
- while( num_elements % max_threadgroup_size )
- max_threadgroup_size--;
-
- input_ptr = (int*)malloc(sizeof(int) * num_elements);
- output_ptr = (int*)malloc(sizeof(int));
-
- streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * num_elements, NULL, &err);
- test_error(err, "clCreateBuffer failed.");
- streams[1] =
- clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &err);
- test_error(err, "clCreateBuffer failed.");
- streams[2] =
- clCreateBuffer(context, CL_MEM_READ_WRITE,
- sizeof(cl_int) * max_threadgroup_size, NULL, &err);
- test_error(err, "clCreateBuffer failed.");
-
- d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
- input_ptr[i] = (int)get_random_float(-0x01000000, 0x01000000, d);
- free_mtdata(d); d = NULL;
-
- err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, sizeof(cl_int)*num_elements, (void *)input_ptr, 0, NULL, NULL);
- test_error(err, "clEnqueueWriteBuffer failed.");
-
- err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
- err |= clSetKernelArg(kernel, 1, sizeof num_elements, &num_elements);
- err |= clSetKernelArg(kernel, 2, sizeof streams[2], &streams[2]);
- err |= clSetKernelArg(kernel, 3, sizeof streams[1], &streams[1]);
- test_error(err, "clSetKernelArg failed.");
-
- global_threads[0] = max_threadgroup_size;
- local_threads[0] = max_threadgroup_size;
-
- err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
- test_error(err, "clEnqueueNDRangeKernel failed.");
-
- err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int), (void *)output_ptr, 0, NULL, NULL );
- test_error(err, "clEnqueueReadBuffer failed.");
-
- err = verify_sum(input_ptr, tmp_ptr, output_ptr, num_elements);
-
- // cleanup
- clReleaseMemObject(streams[0]);
- clReleaseMemObject(streams[1]);
- clReleaseMemObject(streams[2]);
- clReleaseKernel(kernel);
- clReleaseProgram(program);
- free(input_ptr);
- free(output_ptr);
-
- return err;
-}
diff --git a/test_conformance/basic/test_work_item_functions.cpp b/test_conformance/basic/test_work_item_functions.cpp
index d95915cf..9683a834 100644
--- a/test_conformance/basic/test_work_item_functions.cpp
+++ b/test_conformance/basic/test_work_item_functions.cpp
@@ -91,7 +91,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma
{
for( int i = 0; i < NUM_TESTS; i++ )
{
- size_t numItems = 1;
for( size_t j = 0; j < dim; j++ )
{
// All of our thread sizes should be within the max local sizes, since they're all <= 20
@@ -100,8 +99,6 @@ int test_work_item_functions(cl_device_id deviceID, cl_context context, cl_comma
while( localThreads[ j ] > 1 && ( threads[ j ] % localThreads[ j ] != 0 ) )
localThreads[ j ]--;
- numItems *= threads[ j ];
-
// Hack for now: localThreads > 1 are iffy
localThreads[ j ] = 1;
}
diff --git a/test_conformance/c11_atomics/CMakeLists.txt b/test_conformance/c11_atomics/CMakeLists.txt
index 621adda7..0d389bce 100644
--- a/test_conformance/c11_atomics/CMakeLists.txt
+++ b/test_conformance/c11_atomics/CMakeLists.txt
@@ -7,4 +7,6 @@ set(${MODULE_NAME}_SOURCES
test_atomics.cpp
)
+set_gnulike_module_compile_flags("-Wno-sign-compare")
+
include(../CMakeCommon.txt)
diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 6c7d0b12..37c37e87 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _COMMON_H_
-#define _COMMON_H_
+#ifndef COMMON_H_
+#define COMMON_H_
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
@@ -1567,4 +1567,4 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
return 0;
}
-#endif //_COMMON_H_
+#endif // COMMON_H_
diff --git a/test_conformance/c11_atomics/host_atomics.h b/test_conformance/c11_atomics/host_atomics.h
index 6c4e783a..b865970f 100644
--- a/test_conformance/c11_atomics/host_atomics.h
+++ b/test_conformance/c11_atomics/host_atomics.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _HOST_ATOMICS_H_
-#define _HOST_ATOMICS_H_
+#ifndef HOST_ATOMICS_H_
+#define HOST_ATOMICS_H_
#include "harness/testHarness.h"
@@ -247,4 +247,4 @@ CorrespondingType host_atomic_fetch_max(volatile AtomicType *a, CorrespondingTyp
bool host_atomic_flag_test_and_set(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order);
void host_atomic_flag_clear(volatile HOST_ATOMIC_FLAG *a, TExplicitMemoryOrderType order);
-#endif //_HOST_ATOMICS_H_
+#endif // HOST_ATOMICS_H_
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index d905b2ca..ca2c2242 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -3145,7 +3145,7 @@ public:
}
private:
- int _subCaseId;
+ size_t _subCaseId;
struct TestDefinition _subCase;
};
diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
index 0a459e97..5d0e99e0 100644
--- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.cpp
@@ -19,7 +19,7 @@
#include "harness/errorHelpers.h"
#include "harness/deviceInfo.h"
#include <assert.h>
-#include <iostream>
+#include <algorithm>
#include <stdexcept>
#define ASSERT(x) assert((x))
@@ -740,21 +740,42 @@ clExternalSemaphore::clExternalSemaphore(
cl_int err = 0;
cl_device_id devList[] = { deviceId, NULL };
-#ifdef _WIN32
- if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32"))
- {
- throw std::runtime_error("Device does not support "
- "cl_khr_external_semaphore_win32 extension\n");
- }
-#elif !defined(__APPLE__)
- if (!is_extension_available(devList[0],
- "cl_khr_external_semaphore_opaque_fd"))
+ switch (externalSemaphoreHandleType)
{
- throw std::runtime_error(
- "Device does not support cl_khr_external_semaphore_opaque_fd "
- "extension \n");
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+ if (!is_extension_available(devList[0],
+ "cl_khr_external_semaphore_opaque_fd"))
+ {
+ throw std::runtime_error("Device does not support "
+ "cl_khr_external_semaphore_opaque_fd "
+ "extension \n");
+ }
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
+ if (!is_extension_available(devList[0],
+ "cl_khr_external_semaphore_win32"))
+ {
+ throw std::runtime_error(
+ "Device does not support "
+ "cl_khr_external_semaphore_win32 extension\n");
+ }
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD:
+ if (!is_extension_available(devList[0],
+ "cl_khr_external_semaphore_sync_fd"))
+ {
+ throw std::runtime_error(
+ "Device does not support cl_khr_external_semaphore_sync_fd "
+ "extension \n");
+ }
+ break;
+ default:
+ throw std::runtime_error(
+ "Unsupported external semaphore handle type\n");
+ break;
}
-#endif
std::vector<cl_semaphore_properties_khr> sema_props{
(cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
@@ -803,6 +824,16 @@ clExternalSemaphore::clExternalSemaphore(
sema_props.push_back((cl_semaphore_properties_khr)handle);
#endif
break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD:
+ err = check_external_semaphore_handle_type(
+ devList[0], CL_SEMAPHORE_HANDLE_SYNC_FD_KHR);
+ sema_props.push_back(static_cast<cl_semaphore_properties_khr>(
+ CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR));
+ sema_props.push_back(static_cast<cl_semaphore_properties_khr>(
+ CL_SEMAPHORE_HANDLE_SYNC_FD_KHR));
+ sema_props.push_back(static_cast<cl_semaphore_properties_khr>(
+ CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR));
+ break;
default:
ASSERT(0);
log_error("Unsupported external memory handle type\n");
@@ -856,3 +887,67 @@ cl_semaphore_khr &clExternalSemaphore::getCLSemaphore()
{
return m_externalSemaphore;
}
+
+cl_external_memory_handle_type_khr vkToOpenCLExternalMemoryHandleType(
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType)
+{
+ switch (vkExternalMemoryHandleType)
+ {
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+ return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
+ return CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR;
+ case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE: return 0;
+ }
+ return 0;
+}
+
+VulkanImageTiling vkClExternalMemoryHandleTilingAssumption(
+ cl_device_id deviceId,
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType, int *error_ret)
+{
+ size_t size = 0;
+ VulkanImageTiling mode = VULKAN_IMAGE_TILING_OPTIMAL;
+
+ assert(error_ret
+ != nullptr); // errcode_ret is not optional, it must be checked
+
+ *error_ret = clGetDeviceInfo(
+ deviceId,
+ CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR,
+ 0, nullptr, &size);
+ if (*error_ret != CL_SUCCESS)
+ {
+ return mode;
+ }
+
+ if (size == 0)
+ {
+ return mode;
+ }
+
+ std::vector<cl_external_memory_handle_type_khr> assume_linear_types(
+ size / sizeof(cl_external_memory_handle_type_khr));
+
+ *error_ret = clGetDeviceInfo(
+ deviceId,
+ CL_DEVICE_EXTERNAL_MEMORY_IMPORT_ASSUME_LINEAR_IMAGES_HANDLE_TYPES_KHR,
+ size, assume_linear_types.data(), nullptr);
+ if (*error_ret != CL_SUCCESS)
+ {
+ return mode;
+ }
+
+ if (std::find(
+ assume_linear_types.begin(), assume_linear_types.end(),
+ vkToOpenCLExternalMemoryHandleType(vkExternalMemoryHandleType))
+ != assume_linear_types.end())
+ {
+ mode = VULKAN_IMAGE_TILING_LINEAR;
+ }
+
+ return mode;
+}
diff --git a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
index 5143332d..4a1d453e 100644
--- a/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/opencl_vulkan_wrapper.hpp
@@ -129,4 +129,8 @@ public:
extern void init_cl_vk_ext(cl_platform_id);
+VulkanImageTiling vkClExternalMemoryHandleTilingAssumption(
+ cl_device_id deviceId,
+ VulkanExternalMemoryHandleType vkExternalMemoryHandleType, int *error_ret);
+
#endif // _opencl_vulkan_wrapper_hpp_
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
index c62a71e1..e9c06f98 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_api_list.hpp
@@ -75,6 +75,7 @@
VK_FUNC_DECL(vkDestroyImageView) \
VK_FUNC_DECL(vkCreateImage) \
VK_FUNC_DECL(vkGetImageMemoryRequirements) \
+ VK_FUNC_DECL(vkGetImageMemoryRequirements2) \
VK_FUNC_DECL(vkDestroyImage) \
VK_FUNC_DECL(vkDestroyBuffer) \
VK_FUNC_DECL(vkDestroyPipeline) \
@@ -87,8 +88,9 @@
VK_FUNC_DECL(vkDestroyDescriptorSetLayout) \
VK_FUNC_DECL(vkGetPhysicalDeviceQueueFamilyProperties) \
VK_FUNC_DECL(vkGetPhysicalDeviceFeatures) \
- VK_FUNC_DECL(vkGetPhysicalDeviceProperties2KHR) \
+ VK_FUNC_DECL(vkGetPhysicalDeviceProperties2) \
VK_FUNC_DECL(vkGetBufferMemoryRequirements) \
+ VK_FUNC_DECL(vkGetBufferMemoryRequirements2) \
VK_FUNC_DECL(vkGetMemoryFdKHR) \
VK_FUNC_DECL(vkGetSemaphoreFdKHR) \
VK_FUNC_DECL(vkEnumeratePhysicalDeviceGroups) \
@@ -160,6 +162,7 @@
#define vkDestroyImageView _vkDestroyImageView
#define vkCreateImage _vkCreateImage
#define vkGetImageMemoryRequirements _vkGetImageMemoryRequirements
+#define vkGetImageMemoryRequirements2 _vkGetImageMemoryRequirements2
#define vkDestroyImage _vkDestroyImage
#define vkDestroyBuffer _vkDestroyBuffer
#define vkDestroyPipeline _vkDestroyPipeline
@@ -173,8 +176,9 @@
#define vkGetPhysicalDeviceQueueFamilyProperties \
_vkGetPhysicalDeviceQueueFamilyProperties
#define vkGetPhysicalDeviceFeatures _vkGetPhysicalDeviceFeatures
-#define vkGetPhysicalDeviceProperties2KHR _vkGetPhysicalDeviceProperties2KHR
+#define vkGetPhysicalDeviceProperties2 _vkGetPhysicalDeviceProperties2
#define vkGetBufferMemoryRequirements _vkGetBufferMemoryRequirements
+#define vkGetBufferMemoryRequirements2 _vkGetBufferMemoryRequirements2
#define vkGetMemoryFdKHR _vkGetMemoryFdKHR
#define vkGetSemaphoreFdKHR _vkGetSemaphoreFdKHR
#define vkEnumeratePhysicalDeviceGroups _vkEnumeratePhysicalDeviceGroups
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
index 4e276519..a5ca0901 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.cpp
@@ -141,6 +141,16 @@ VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList() {}
+void VulkanDescriptorSetLayoutBindingList::addBinding(
+ size_t binding, VulkanDescriptorType descriptorType,
+ uint32_t descriptorCount, VulkanShaderStage shaderStage)
+{
+ VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding =
+ new VulkanDescriptorSetLayoutBinding(binding, descriptorType,
+ descriptorCount, shaderStage);
+ add(*descriptorSetLayoutBinding);
+}
+
VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
size_t numDescriptorSetLayoutBindings, VulkanDescriptorType descriptorType,
uint32_t descriptorCount, VulkanShaderStage shaderStage)
@@ -268,6 +278,7 @@ VulkanImage2DList::VulkanImage2DList(
size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device,
VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels,
+ VulkanImageTiling vulkanImageTiling,
VulkanExternalMemoryHandleType externalMemoryHandleType,
VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
VulkanSharingMode sharingMode)
@@ -275,8 +286,8 @@ VulkanImage2DList::VulkanImage2DList(
for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++)
{
VulkanImage2D *image2D = new VulkanImage2D(
- device, format, width, height, mipLevels, externalMemoryHandleType,
- imageCreateFlag, imageUsage, sharingMode);
+ device, format, width, height, vulkanImageTiling, mipLevels,
+ externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode);
add(*image2D);
deviceMemory[i2DIdx]->bindImage(
*image2D, baseOffset + (i2DIdx * interImageOffset));
@@ -285,16 +296,16 @@ VulkanImage2DList::VulkanImage2DList(
VulkanImage2DList::VulkanImage2DList(
size_t numImages, const VulkanDevice &device, VulkanFormat format,
- uint32_t width, uint32_t height, uint32_t mipLevels,
- VulkanExternalMemoryHandleType externalMemoryHandleType,
+ uint32_t width, uint32_t height, VulkanImageTiling vulkanImageTiling,
+ uint32_t mipLevels, VulkanExternalMemoryHandleType externalMemoryHandleType,
VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
VulkanSharingMode sharingMode)
{
for (size_t bIdx = 0; bIdx < numImages; bIdx++)
{
VulkanImage2D *image2D = new VulkanImage2D(
- device, format, width, height, mipLevels, externalMemoryHandleType,
- imageCreateFlag, imageUsage, sharingMode);
+ device, format, width, height, vulkanImageTiling, mipLevels,
+ externalMemoryHandleType, imageCreateFlag, imageUsage, sharingMode);
add(*image2D);
}
}
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
index 52206779..ef00b70a 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_list_map.hpp
@@ -154,6 +154,10 @@ public:
VulkanDescriptorType descriptorType0, uint32_t descriptorCount0,
VulkanDescriptorType descriptorType1, uint32_t descriptorCount1,
VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
+ void
+ addBinding(size_t binding, VulkanDescriptorType descriptorType,
+ uint32_t descriptorCount,
+ VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
virtual ~VulkanDescriptorSetLayoutBindingList();
};
@@ -208,6 +212,7 @@ public:
uint64_t baseOffset, uint64_t interImageOffset,
const VulkanDevice &device, VulkanFormat format, uint32_t width,
uint32_t height, uint32_t mipLevels,
+ VulkanImageTiling vulkanImageTiling,
VulkanExternalMemoryHandleType externalMemoryHandleType =
VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
@@ -216,7 +221,8 @@ public:
VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
VulkanImage2DList(
size_t numImages, const VulkanDevice &device, VulkanFormat format,
- uint32_t width, uint32_t height, uint32_t mipLevels = 1,
+ uint32_t width, uint32_t height, VulkanImageTiling vulkanImageTiling,
+ uint32_t mipLevels = 1,
VulkanExternalMemoryHandleType externalMemoryHandleType =
VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp
index 1a313cce..2124a275 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.cpp
@@ -21,6 +21,7 @@
#include <fstream>
#include <set>
#include <string>
+#include <algorithm>
#include <CL/cl.h>
#include <CL/cl_ext.h>
#if defined(_WIN32) || defined(_WIN64)
@@ -248,6 +249,9 @@ getSupportedVulkanExternalSemaphoreHandleTypeList()
}
externalSemaphoreHandleTypeList.push_back(
VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT);
+#elif defined(__ANDROID__)
+ externalSemaphoreHandleTypeList.push_back(
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD);
#else
externalSemaphoreHandleTypeList.push_back(
VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD);
@@ -480,6 +484,33 @@ const std::vector<VulkanFormat> getSupportedVulkanFormatList()
return formatList;
}
+cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType(
+ VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType)
+{
+ cl_external_semaphore_handle_type_khr clExternalSemaphoreHandleTypeKhr = 0;
+ switch (vulkanExternalSemaphoreHandleType)
+ {
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+ clExternalSemaphoreHandleTypeKhr =
+ CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR;
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+ clExternalSemaphoreHandleTypeKhr =
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR;
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+ clExternalSemaphoreHandleTypeKhr =
+ CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR;
+ break;
+ case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD:
+ clExternalSemaphoreHandleTypeKhr = CL_SEMAPHORE_HANDLE_SYNC_FD_KHR;
+ break;
+ default: break;
+ }
+ return clExternalSemaphoreHandleTypeKhr;
+}
+
uint32_t getVulkanFormatElementSize(VulkanFormat format)
{
switch (format)
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp
index 04f5a594..51284125 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_utility.hpp
@@ -33,7 +33,8 @@
const VulkanInstance& getVulkanInstance();
const VulkanPhysicalDevice& getVulkanPhysicalDevice();
const VulkanQueueFamily&
-getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_MASK_ALL);
+getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_GRAPHICS
+ | VULKAN_QUEUE_FLAG_COMPUTE);
const VulkanMemoryType&
getVulkanMemoryType(const VulkanDevice& device,
VulkanMemoryTypeProperty memoryTypeProperty);
@@ -51,6 +52,8 @@ const std::vector<VulkanFormat> getSupportedVulkanFormatList();
uint32_t getVulkanFormatElementSize(VulkanFormat format);
const char* getVulkanFormatGLSLFormat(VulkanFormat format);
const char* getVulkanFormatGLSLTypePrefix(VulkanFormat format);
+cl_external_semaphore_handle_type_khr getCLSemaphoreTypeFromVulkanType(
+ VulkanExternalSemaphoreHandleType vulkanExternalSemaphoreHandleType);
std::string prepareVulkanShader(
std::string shaderCode,
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
index 3ce4af6b..73c5e9a1 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.cpp
@@ -72,7 +72,9 @@ VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE)
#if defined(_WIN32) || defined(_WIN64)
const char *vulkanLoaderLibraryName = "vulkan-1.dll";
-#elif defined(__linux__)
+#elif defined(__ANDROID__)
+ const char *vulkanLoaderLibraryName = "libvulkan.so";
+#else
const char *vulkanLoaderLibraryName = "libvulkan.so.1";
#endif
#ifdef _WIN32
@@ -274,13 +276,13 @@ VulkanPhysicalDevice::VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice)
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR;
vkPhysicalDeviceIDPropertiesKHR.pNext = NULL;
- VkPhysicalDeviceProperties2KHR vkPhysicalDeviceProperties2KHR = {};
- vkPhysicalDeviceProperties2KHR.sType =
+ VkPhysicalDeviceProperties2 vkPhysicalDeviceProperties2 = {};
+ vkPhysicalDeviceProperties2.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
- vkPhysicalDeviceProperties2KHR.pNext = &vkPhysicalDeviceIDPropertiesKHR;
+ vkPhysicalDeviceProperties2.pNext = &vkPhysicalDeviceIDPropertiesKHR;
- vkGetPhysicalDeviceProperties2KHR(m_vkPhysicalDevice,
- &vkPhysicalDeviceProperties2KHR);
+ vkGetPhysicalDeviceProperties2(m_vkPhysicalDevice,
+ &vkPhysicalDeviceProperties2);
memcpy(m_vkDeviceUUID, vkPhysicalDeviceIDPropertiesKHR.deviceUUID,
sizeof(m_vkDeviceUUID));
@@ -605,6 +607,37 @@ VulkanQueue &VulkanDevice::getQueue(const VulkanQueueFamily &queueFamily,
VulkanDevice::operator VkDevice() const { return m_vkDevice; }
////////////////////////////////
+// VulkanFence implementation //
+////////////////////////////////
+
+VulkanFence::VulkanFence(const VulkanDevice &vkDevice)
+{
+
+ device = vkDevice;
+
+ VkFenceCreateInfo fenceInfo{};
+ fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
+ fenceInfo.pNext = nullptr;
+ fenceInfo.flags = 0;
+
+ VkResult vkStatus = vkCreateFence(device, &fenceInfo, nullptr, &fence);
+
+ if (vkStatus != VK_SUCCESS)
+ {
+ throw std::runtime_error("Error: Failed create fence.");
+ }
+}
+
+VulkanFence::~VulkanFence() { vkDestroyFence(device, fence, nullptr); }
+
+void VulkanFence::reset() { vkResetFences(device, 1, &fence); }
+
+void VulkanFence::wait()
+{
+ vkWaitForFences(device, 1, &fence, VK_TRUE, UINT64_MAX);
+}
+
+////////////////////////////////
// VulkanQueue implementation //
////////////////////////////////
@@ -615,6 +648,22 @@ VulkanQueue::VulkanQueue(VkQueue vkQueue): m_vkQueue(vkQueue) {}
VulkanQueue::~VulkanQueue() {}
+void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer,
+ const std::shared_ptr<VulkanFence> &vkFence)
+{
+ VulkanCommandBufferList commandBufferList;
+ commandBufferList.add(commandBuffer);
+
+ VkSubmitInfo vkSubmitInfo = {};
+ vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+ vkSubmitInfo.pNext = NULL;
+ vkSubmitInfo.waitSemaphoreCount = (uint32_t)0;
+ vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size();
+ vkSubmitInfo.pCommandBuffers = commandBufferList();
+
+ vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, vkFence->fence);
+}
+
void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList,
const VulkanCommandBufferList &commandBufferList,
const VulkanSemaphoreList &signalSemaphoreList)
@@ -964,12 +1013,14 @@ void VulkanDescriptorPool::VulkanDescriptorPoolCommon(
== vkDescriptorTypeToDescriptorCountMap.end())
{
vkDescriptorTypeToDescriptorCountMap
- [vkDescriptorSetLayoutBinding.descriptorType] = 1;
+ [vkDescriptorSetLayoutBinding.descriptorType] =
+ vkDescriptorSetLayoutBinding.descriptorCount;
}
else
{
vkDescriptorTypeToDescriptorCountMap
- [vkDescriptorSetLayoutBinding.descriptorType]++;
+ [vkDescriptorSetLayoutBinding.descriptorType] +=
+ vkDescriptorSetLayoutBinding.descriptorCount;
}
}
@@ -1110,6 +1161,35 @@ void VulkanDescriptorSet::update(uint32_t binding, const VulkanBuffer &buffer)
vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
}
+void VulkanDescriptorSet::updateArray(uint32_t binding, unsigned numBuffers,
+ const VulkanBufferList &buffers)
+{
+ VkDescriptorBufferInfo *vkDescriptorBufferInfo =
+ (VkDescriptorBufferInfo *)calloc(numBuffers,
+ sizeof(VkDescriptorBufferInfo));
+ for (unsigned i = 0; i < numBuffers; i++)
+ {
+ vkDescriptorBufferInfo[i].buffer = buffers[i];
+ vkDescriptorBufferInfo[i].offset = 0;
+ vkDescriptorBufferInfo[i].range = VK_WHOLE_SIZE;
+ }
+
+ VkWriteDescriptorSet vkWriteDescriptorSet = {};
+ vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ vkWriteDescriptorSet.pNext = NULL;
+ vkWriteDescriptorSet.dstSet = m_vkDescriptorSet;
+ vkWriteDescriptorSet.dstBinding = binding;
+ vkWriteDescriptorSet.dstArrayElement = 0;
+ vkWriteDescriptorSet.descriptorCount = numBuffers;
+ vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+ vkWriteDescriptorSet.pImageInfo = NULL;
+ vkWriteDescriptorSet.pBufferInfo = vkDescriptorBufferInfo;
+ vkWriteDescriptorSet.pTexelBufferView = NULL;
+
+ vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
+ free(vkDescriptorBufferInfo);
+}
+
void VulkanDescriptorSet::update(uint32_t binding,
const VulkanImageView &imageView)
{
@@ -1133,6 +1213,34 @@ void VulkanDescriptorSet::update(uint32_t binding,
vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
}
+void VulkanDescriptorSet::updateArray(uint32_t binding,
+ const VulkanImageViewList &imageViewList)
+{
+ VkDescriptorImageInfo *vkDescriptorImageInfo =
+ new VkDescriptorImageInfo[imageViewList.size()];
+ for (size_t i = 0; i < imageViewList.size(); i++)
+ {
+ vkDescriptorImageInfo[i].sampler = VK_NULL_HANDLE;
+ vkDescriptorImageInfo[i].imageView = imageViewList[i];
+ vkDescriptorImageInfo[i].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+ }
+
+ VkWriteDescriptorSet vkWriteDescriptorSet = {};
+ vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+ vkWriteDescriptorSet.pNext = NULL;
+ vkWriteDescriptorSet.dstSet = m_vkDescriptorSet;
+ vkWriteDescriptorSet.dstBinding = binding;
+ vkWriteDescriptorSet.dstArrayElement = 0;
+ vkWriteDescriptorSet.descriptorCount = imageViewList.size();
+ vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+ vkWriteDescriptorSet.pImageInfo = vkDescriptorImageInfo;
+ vkWriteDescriptorSet.pBufferInfo = NULL;
+ vkWriteDescriptorSet.pTexelBufferView = NULL;
+
+ vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
+ delete[] vkDescriptorImageInfo;
+}
+
VulkanDescriptorSet::operator VkDescriptorSet() const
{
return m_vkDescriptorSet;
@@ -1456,12 +1564,14 @@ VulkanBuffer::VulkanBuffer(const VulkanBuffer &buffer)
m_memoryTypeList(buffer.m_memoryTypeList)
{}
+bool VulkanBuffer::isDedicated() const { return m_dedicated; }
+
VulkanBuffer::VulkanBuffer(
const VulkanDevice &device, uint64_t size,
VulkanExternalMemoryHandleType externalMemoryHandleType,
VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode,
const VulkanQueueFamilyList &queueFamilyList)
- : m_device(device), m_vkBuffer(VK_NULL_HANDLE)
+ : m_device(device), m_vkBuffer(VK_NULL_HANDLE), m_dedicated(false)
{
std::vector<uint32_t> queueFamilyIndexList;
if (queueFamilyList.size() == 0)
@@ -1507,16 +1617,36 @@ VulkanBuffer::VulkanBuffer(
vkCreateBuffer(m_device, &vkBufferCreateInfo, NULL, &m_vkBuffer);
- VkMemoryRequirements vkMemoryRequirements = {};
- vkGetBufferMemoryRequirements(m_device, m_vkBuffer, &vkMemoryRequirements);
- m_size = vkMemoryRequirements.size;
- m_alignment = vkMemoryRequirements.alignment;
+ VkMemoryDedicatedRequirements vkMemoryDedicatedRequirements = {};
+ vkMemoryDedicatedRequirements.sType =
+ VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS;
+ vkMemoryDedicatedRequirements.pNext = NULL;
+
+ VkMemoryRequirements2 vkMemoryRequirements = {};
+ vkMemoryRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
+ vkMemoryRequirements.pNext = &vkMemoryDedicatedRequirements;
+
+ VkBufferMemoryRequirementsInfo2 vkMemoryRequirementsInfo = {};
+
+ vkMemoryRequirementsInfo.sType =
+ VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2;
+ vkMemoryRequirementsInfo.buffer = m_vkBuffer;
+ vkMemoryRequirementsInfo.pNext = NULL;
+
+ vkGetBufferMemoryRequirements2(m_device, &vkMemoryRequirementsInfo,
+ &vkMemoryRequirements);
+
+ m_dedicated = vkMemoryDedicatedRequirements.requiresDedicatedAllocation;
+
+ m_size = vkMemoryRequirements.memoryRequirements.size;
+ m_alignment = vkMemoryRequirements.memoryRequirements.alignment;
const VulkanMemoryTypeList &memoryTypeList =
m_device.getPhysicalDevice().getMemoryTypeList();
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
{
uint32_t memoryTypeIndex = memoryTypeList[mtIdx];
- if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits)
+ if ((1 << memoryTypeIndex)
+ & vkMemoryRequirements.memoryRequirements.memoryTypeBits)
{
m_memoryTypeList.add(memoryTypeList[mtIdx]);
}
@@ -1591,16 +1721,36 @@ VulkanImage::VulkanImage(
vkCreateImage(m_device, &vkImageCreateInfo, NULL, &m_vkImage);
VulkanImageCreateInfo = vkImageCreateInfo;
- VkMemoryRequirements vkMemoryRequirements = {};
- vkGetImageMemoryRequirements(m_device, m_vkImage, &vkMemoryRequirements);
- m_size = vkMemoryRequirements.size;
- m_alignment = vkMemoryRequirements.alignment;
+
+ VkMemoryDedicatedRequirements vkMemoryDedicatedRequirements = {};
+ vkMemoryDedicatedRequirements.sType =
+ VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS;
+ vkMemoryDedicatedRequirements.pNext = NULL;
+
+ VkMemoryRequirements2 vkMemoryRequirements = {};
+ vkMemoryRequirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
+ vkMemoryRequirements.pNext = &vkMemoryDedicatedRequirements;
+
+ VkImageMemoryRequirementsInfo2 vkMemoryRequirementsInfo = {};
+
+ vkMemoryRequirementsInfo.sType =
+ VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2;
+ vkMemoryRequirementsInfo.image = m_vkImage;
+ vkMemoryRequirementsInfo.pNext = NULL;
+
+ vkGetImageMemoryRequirements2(m_device, &vkMemoryRequirementsInfo,
+ &vkMemoryRequirements);
+ m_size = vkMemoryRequirements.memoryRequirements.size;
+ m_alignment = vkMemoryRequirements.memoryRequirements.alignment;
+ m_dedicated = vkMemoryDedicatedRequirements.requiresDedicatedAllocation;
+
const VulkanMemoryTypeList &memoryTypeList =
m_device.getPhysicalDevice().getMemoryTypeList();
for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
{
uint32_t memoryTypeIndex = memoryTypeList[mtIdx];
- if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits)
+ if ((1 << memoryTypeIndex)
+ & vkMemoryRequirements.memoryRequirements.memoryTypeBits)
{
m_memoryTypeList.add(memoryTypeList[mtIdx]);
}
@@ -1629,6 +1779,8 @@ uint64_t VulkanImage::getSize() const { return m_size; }
uint64_t VulkanImage::getAlignment() const { return m_alignment; }
+bool VulkanImage::isDedicated() const { return m_dedicated; }
+
const VulkanMemoryTypeList &VulkanImage::getMemoryTypeList() const
{
return m_memoryTypeList;
@@ -1645,14 +1797,14 @@ VulkanImage2D::VulkanImage2D(const VulkanImage2D &image2D): VulkanImage(image2D)
VulkanImage2D::VulkanImage2D(
const VulkanDevice &device, VulkanFormat format, uint32_t width,
- uint32_t height, uint32_t numMipLevels,
+ uint32_t height, VulkanImageTiling imageTiling, uint32_t numMipLevels,
VulkanExternalMemoryHandleType externalMemoryHandleType,
VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
VulkanSharingMode sharingMode)
: VulkanImage(device, VULKAN_IMAGE_TYPE_2D, format,
VulkanExtent3D(width, height, 1), numMipLevels, 1,
- externalMemoryHandleType, imageCreateFlag,
- VULKAN_IMAGE_TILING_OPTIMAL, imageUsage, sharingMode)
+ externalMemoryHandleType, imageCreateFlag, imageTiling,
+ imageUsage, sharingMode)
{}
VulkanImage2D::~VulkanImage2D() {}
@@ -1839,7 +1991,8 @@ VulkanDeviceMemory::VulkanDeviceMemory(
const VulkanDevice &device, const VulkanImage &image,
const VulkanMemoryType &memoryType,
VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name)
- : m_device(device), m_size(image.getSize()), m_isDedicated(true)
+ : m_device(device), m_size(image.getSize()),
+ m_isDedicated(image.isDedicated())
{
#if defined(_WIN32) || defined(_WIN64)
WindowsSecurityAttributes winSecurityAttributes;
@@ -1872,20 +2025,95 @@ VulkanDeviceMemory::VulkanDeviceMemory(
VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {};
vkMemoryDedicatedAllocateInfo.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
- vkMemoryDedicatedAllocateInfo.pNext =
- externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+ vkMemoryDedicatedAllocateInfo.pNext = NULL;
vkMemoryDedicatedAllocateInfo.image = image;
vkMemoryDedicatedAllocateInfo.buffer = VK_NULL_HANDLE;
VkMemoryAllocateInfo vkMemoryAllocateInfo = {};
vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
- vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo;
vkMemoryAllocateInfo.allocationSize = m_size;
vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType;
+ if (m_isDedicated)
+ {
+ vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo;
+ vkMemoryDedicatedAllocateInfo.pNext =
+ externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+ }
+ else
+ {
+ vkMemoryAllocateInfo.pNext =
+ externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+ }
+
vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory);
}
+VulkanDeviceMemory::VulkanDeviceMemory(
+ const VulkanDevice &device, const VulkanBuffer &buffer,
+ const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name)
+ : m_device(device), m_size(buffer.getSize()),
+ m_isDedicated(buffer.isDedicated())
+{
+#if defined(_WIN32) || defined(_WIN64)
+ WindowsSecurityAttributes winSecurityAttributes;
+
+ VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {};
+ vkExportMemoryWin32HandleInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
+ vkExportMemoryWin32HandleInfoKHR.pNext = NULL;
+ vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
+ vkExportMemoryWin32HandleInfoKHR.dwAccess =
+ DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
+ vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name;
+
+#endif
+
+ VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {};
+ vkExportMemoryAllocateInfoKHR.sType =
+ VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
+#if defined(_WIN32) || defined(_WIN64)
+ vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType
+ & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+ ? &vkExportMemoryWin32HandleInfoKHR
+ : NULL;
+#else
+ vkExportMemoryAllocateInfoKHR.pNext = NULL;
+#endif
+ vkExportMemoryAllocateInfoKHR.handleTypes =
+ (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType;
+
+ VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {};
+ vkMemoryDedicatedAllocateInfo.sType =
+ VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
+ vkMemoryDedicatedAllocateInfo.pNext = NULL;
+ vkMemoryDedicatedAllocateInfo.image = VK_NULL_HANDLE;
+ vkMemoryDedicatedAllocateInfo.buffer = buffer;
+
+ VkMemoryAllocateInfo vkMemoryAllocateInfo = {};
+ vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+ vkMemoryAllocateInfo.allocationSize = m_size;
+ vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType;
+
+ if (m_isDedicated)
+ {
+ vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo;
+ vkMemoryDedicatedAllocateInfo.pNext =
+ externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+ }
+ else
+ {
+ vkMemoryAllocateInfo.pNext =
+ externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+ }
+
+
+ VkResult res = vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL,
+ &m_vkDeviceMemory);
+ ASSERT_SUCCESS(res, "Failed to allocate device memory");
+}
+
VulkanDeviceMemory::~VulkanDeviceMemory()
{
vkFreeMemory(m_device, m_vkDeviceMemory, NULL);
@@ -1952,11 +2180,21 @@ void VulkanDeviceMemory::unmap() { vkUnmapMemory(m_device, m_vkDeviceMemory); }
void VulkanDeviceMemory::bindBuffer(const VulkanBuffer &buffer, uint64_t offset)
{
+ if (buffer.isDedicated() && !m_isDedicated)
+ {
+ throw std::runtime_error(
+ "Buffer requires dedicated memory. Failed to bind");
+ }
vkBindBufferMemory(m_device, buffer, m_vkDeviceMemory, offset);
}
void VulkanDeviceMemory::bindImage(const VulkanImage &image, uint64_t offset)
{
+ if (image.isDedicated() && !m_isDedicated)
+ {
+ throw std::runtime_error(
+ "Image requires dedicated memory. Failed to bind");
+ }
vkBindImageMemory(m_device, image, m_vkDeviceMemory, offset);
}
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
index 37925ee4..7fcc70f3 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper.hpp
@@ -21,6 +21,7 @@
#include "vulkan_wrapper_types.hpp"
#include "vulkan_list_map.hpp"
#include "vulkan_api_list.hpp"
+#include <memory>
class VulkanInstance {
friend const VulkanInstance &getVulkanInstance();
@@ -145,6 +146,20 @@ public:
operator VkDevice() const;
};
+class VulkanFence {
+ friend class VulkanQueue;
+
+protected:
+ VkFence fence;
+ VkDevice device;
+
+public:
+ VulkanFence(const VulkanDevice &device);
+ virtual ~VulkanFence();
+ void reset();
+ void wait();
+};
+
class VulkanQueue {
friend class VulkanDevice;
@@ -157,6 +172,8 @@ protected:
public:
const VulkanQueueFamily &getQueueFamily();
+ void submit(const VulkanCommandBuffer &commandBuffer,
+ const std::shared_ptr<VulkanFence> &fence);
void submit(const VulkanSemaphoreList &waitSemaphoreList,
const VulkanCommandBufferList &commandBufferList,
const VulkanSemaphoreList &signalSemaphoreList);
@@ -311,7 +328,11 @@ public:
const VulkanDescriptorSetLayout &descriptorSetLayout);
virtual ~VulkanDescriptorSet();
void update(uint32_t binding, const VulkanBuffer &buffer);
+ void updateArray(uint32_t binding, unsigned numBuffers,
+ const VulkanBufferList &buffers);
void update(uint32_t binding, const VulkanImageView &imageView);
+ void updateArray(uint32_t binding,
+ const VulkanImageViewList &imageViewList);
operator VkDescriptorSet() const;
};
@@ -407,6 +428,7 @@ protected:
VkBuffer m_vkBuffer;
uint64_t m_size;
uint64_t m_alignment;
+ bool m_dedicated;
VulkanMemoryTypeList m_memoryTypeList;
VulkanBuffer(const VulkanBuffer &buffer);
@@ -424,6 +446,7 @@ public:
uint64_t getSize() const;
uint64_t getAlignment() const;
const VulkanMemoryTypeList &getMemoryTypeList() const;
+ bool isDedicated() const;
operator VkBuffer() const;
};
@@ -435,6 +458,7 @@ protected:
const VulkanFormat m_format;
const uint32_t m_numMipLevels;
const uint32_t m_numLayers;
+ bool m_dedicated;
VkImage m_vkImage;
uint64_t m_size;
uint64_t m_alignment;
@@ -461,6 +485,7 @@ public:
uint32_t getNumLayers() const;
uint64_t getSize() const;
uint64_t getAlignment() const;
+ bool isDedicated() const;
const VulkanMemoryTypeList &getMemoryTypeList() const;
VkImageCreateInfo getVkImageCreateInfo() const;
operator VkImage() const;
@@ -470,12 +495,11 @@ class VulkanImage2D : public VulkanImage {
protected:
VkImageView m_vkImageView;
- VulkanImage2D(const VulkanImage2D &image2D);
-
public:
VulkanImage2D(
const VulkanDevice &device, VulkanFormat format, uint32_t width,
- uint32_t height, uint32_t numMipLevels = 1,
+ uint32_t height, VulkanImageTiling imageTiling,
+ uint32_t numMipLevels = 1,
VulkanExternalMemoryHandleType externalMemoryHandleType =
VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
@@ -484,6 +508,8 @@ public:
VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
virtual ~VulkanImage2D();
virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const;
+
+ VulkanImage2D(const VulkanImage2D &image2D);
};
class VulkanImageView {
@@ -524,6 +550,11 @@ public:
VulkanExternalMemoryHandleType externalMemoryHandleType =
VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
const void *name = NULL);
+ VulkanDeviceMemory(const VulkanDevice &device, const VulkanBuffer &buffer,
+ const VulkanMemoryType &memoryType,
+ VulkanExternalMemoryHandleType externalMemoryHandleType =
+ VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+ const void *name = NULL);
virtual ~VulkanDeviceMemory();
uint64_t getSize() const;
#ifdef _WIN32
@@ -569,7 +600,6 @@ public:
operator VkSemaphore() const;
};
-
#define VK_FUNC_DECL(name) extern "C" PFN_##name _##name;
VK_FUNC_LIST
#if defined(_WIN32) || defined(_WIN64)
diff --git a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
index 2473a1d7..fcd19373 100644
--- a/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
+++ b/test_conformance/common/vulkan_wrapper/vulkan_wrapper_types.hpp
@@ -169,7 +169,9 @@ enum VulkanExternalSemaphoreHandleType
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
- | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
+ | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
+ VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR
};
enum VulkanBufferUsage
diff --git a/test_conformance/commonfns/main.cpp b/test_conformance/commonfns/main.cpp
index 3e4b0b8e..645d3f70 100644
--- a/test_conformance/commonfns/main.cpp
+++ b/test_conformance/commonfns/main.cpp
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -18,8 +18,10 @@
#include <string.h>
#include "procs.h"
#include "test_base.h"
+#include "harness/kernelHelpers.h"
std::map<size_t, std::string> BaseFunctionTest::type2name;
+cl_half_rounding_mode BaseFunctionTest::halfRoundingMode = CL_HALF_RTE;
int g_arrVecSizes[kVectorSizeCount + kStrangeVectorSizeCount];
int g_arrStrangeVectorSizes[kStrangeVectorSizeCount] = {3};
@@ -45,17 +47,38 @@ test_definition test_list[] = {
const int test_num = ARRAY_SIZE( test_list );
-int main(int argc, const char *argv[])
+test_status InitCL(cl_device_id device)
{
- initVecSizes();
-
- if (BaseFunctionTest::type2name.empty())
+ if (is_extension_available(device, "cl_khr_fp16"))
{
- BaseFunctionTest::type2name[sizeof(half)] = "half";
- BaseFunctionTest::type2name[sizeof(float)] = "float";
- BaseFunctionTest::type2name[sizeof(double)] = "double";
+ const cl_device_fp_config fpConfigHalf =
+ get_default_rounding_mode(device, CL_DEVICE_HALF_FP_CONFIG);
+ if ((fpConfigHalf & CL_FP_ROUND_TO_NEAREST) != 0)
+ {
+ BaseFunctionTest::halfRoundingMode = CL_HALF_RTE;
+ }
+ else if ((fpConfigHalf & CL_FP_ROUND_TO_ZERO) != 0)
+ {
+ BaseFunctionTest::halfRoundingMode = CL_HALF_RTZ;
+ }
+ else
+ {
+ log_error("Error while acquiring half rounding mode");
+ return TEST_FAIL;
+ }
}
- return runTestHarness(argc, argv, test_num, test_list, false, 0);
+ return TEST_PASS;
}
+int main(int argc, const char *argv[])
+{
+ initVecSizes();
+
+ BaseFunctionTest::type2name[sizeof(half)] = "half";
+ BaseFunctionTest::type2name[sizeof(float)] = "float";
+ BaseFunctionTest::type2name[sizeof(double)] = "double";
+
+ return runTestHarnessWithCheck(argc, argv, test_num, test_list, false, 0,
+ InitCL);
+}
diff --git a/test_conformance/commonfns/test_base.h b/test_conformance/commonfns/test_base.h
index 44291042..be36ed26 100644
--- a/test_conformance/commonfns/test_base.h
+++ b/test_conformance/commonfns/test_base.h
@@ -19,27 +19,23 @@
#include <vector>
#include <map>
#include <memory>
+#include <cmath>
#include <CL/cl_half.h>
#include <CL/cl_ext.h>
-#include "harness/deviceInfo.h"
#include "harness/testHarness.h"
#include "harness/typeWrappers.h"
-
template <typename T>
using VerifyFuncBinary = int (*)(const T *const, const T *const, const T *const,
const int num, const int vs, const int vp);
-
template <typename T>
using VerifyFuncUnary = int (*)(const T *const, const T *const, const int num);
-
using half = cl_half;
-
struct BaseFunctionTest
{
BaseFunctionTest(cl_device_id device, cl_context context,
@@ -61,9 +57,9 @@ struct BaseFunctionTest
bool vecParam;
static std::map<size_t, std::string> type2name;
+ static cl_half_rounding_mode halfRoundingMode;
};
-
struct MinTest : BaseFunctionTest
{
MinTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -74,7 +70,6 @@ struct MinTest : BaseFunctionTest
cl_int Run() override;
};
-
struct MaxTest : BaseFunctionTest
{
MaxTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -85,7 +80,6 @@ struct MaxTest : BaseFunctionTest
cl_int Run() override;
};
-
struct ClampTest : BaseFunctionTest
{
ClampTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -96,7 +90,6 @@ struct ClampTest : BaseFunctionTest
cl_int Run() override;
};
-
struct DegreesTest : BaseFunctionTest
{
DegreesTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -107,7 +100,6 @@ struct DegreesTest : BaseFunctionTest
cl_int Run() override;
};
-
struct RadiansTest : BaseFunctionTest
{
RadiansTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -118,7 +110,6 @@ struct RadiansTest : BaseFunctionTest
cl_int Run() override;
};
-
struct SignTest : BaseFunctionTest
{
SignTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -129,7 +120,6 @@ struct SignTest : BaseFunctionTest
cl_int Run() override;
};
-
struct SmoothstepTest : BaseFunctionTest
{
SmoothstepTest(cl_device_id device, cl_context context,
@@ -141,7 +131,6 @@ struct SmoothstepTest : BaseFunctionTest
cl_int Run() override;
};
-
struct StepTest : BaseFunctionTest
{
StepTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -152,7 +141,6 @@ struct StepTest : BaseFunctionTest
cl_int Run() override;
};
-
struct MixTest : BaseFunctionTest
{
MixTest(cl_device_id device, cl_context context, cl_command_queue queue,
@@ -163,19 +151,71 @@ struct MixTest : BaseFunctionTest
cl_int Run() override;
};
+template <typename T> float UlpFn(const T &val, const double &r)
+{
+ if (std::is_same<T, half>::value)
+ {
+ return Ulp_Error_Half(val, r);
+ }
+ else if (std::is_same<T, float>::value)
+ {
+ return Ulp_Error(val, r);
+ }
+ else if (std::is_same<T, double>::value)
+ {
+ return Ulp_Error_Double(val, r);
+ }
+ else
+ {
+ log_error("UlpFn: unsupported data type\n");
+ }
+
+ return -1.f; // wrong val
+}
+
+template <typename T> inline double conv_to_dbl(const T &val)
+{
+ if (std::is_same<T, half>::value)
+ return (double)cl_half_to_float(val);
+ else
+ return (double)val;
+}
-template <typename... Args>
-std::string string_format(const std::string &format, Args... args)
+template <typename T> inline double conv_to_flt(const T &val)
{
- int sformat = std::snprintf(nullptr, 0, format.c_str(), args...) + 1;
- if (sformat <= 0)
- throw std::runtime_error("string_format: string processing error.");
- auto format_size = static_cast<size_t>(sformat);
- std::unique_ptr<char[]> buffer(new char[format_size]);
- std::snprintf(buffer.get(), format_size, format.c_str(), args...);
- return std::string(buffer.get(), buffer.get() + format_size - 1);
+ if (std::is_same<T, half>::value)
+ return (float)cl_half_to_float(val);
+ else
+ return (float)val;
}
+template <typename T> inline half conv_to_half(const T &val)
+{
+ if (std::is_floating_point<T>::value)
+ return cl_half_from_float(val, BaseFunctionTest::halfRoundingMode);
+ return 0;
+}
+
+template <typename T> bool isfinite_fp(const T &v)
+{
+ if (std::is_same<T, half>::value)
+ {
+ // Extract FP16 exponent and mantissa
+ uint16_t h_exp = (((half)v) >> (CL_HALF_MANT_DIG - 1)) & 0x1F;
+ uint16_t h_mant = ((half)v) & 0x3FF;
+
+ // !Inf test
+ return !(h_exp == 0x1F && h_mant == 0);
+ }
+ else
+ {
+#if !defined(_WIN32)
+ return std::isfinite(v);
+#else
+ return isfinite(v);
+#endif
+ }
+}
template <class T>
int MakeAndRunTest(cl_device_id device, cl_context context,
diff --git a/test_conformance/commonfns/test_binary_fn.cpp b/test_conformance/commonfns/test_binary_fn.cpp
index 1eb12f73..a6c75647 100644
--- a/test_conformance/commonfns/test_binary_fn.cpp
+++ b/test_conformance/commonfns/test_binary_fn.cpp
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -22,6 +22,7 @@
#include "harness/deviceInfo.h"
#include "harness/typeWrappers.h"
+#include "harness/stringHelpers.h"
#include "procs.h"
#include "test_base.h"
@@ -53,7 +54,6 @@ const char *binary_fn_code_pattern_v3_scalar =
" vstore3(%s(vload3(tid,x), y[tid] ), tid, dst);\n"
"}\n";
-
template <typename T>
int test_binary_fn(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems,
@@ -105,6 +105,16 @@ int test_binary_fn(cl_device_id device, cl_context context,
input_ptr[1][j] = get_random_double(-0x20000000, 0x20000000, d);
}
}
+ else if (std::is_same<T, half>::value)
+ {
+ const float fval = CL_HALF_MAX;
+ pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+ for (int j = 0; j < num_elements; j++)
+ {
+ input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d));
+ input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d));
+ }
+ }
for (i = 0; i < 2; i++)
{
@@ -125,22 +135,22 @@ int test_binary_fn(cl_device_id device, cl_context context,
{
std::string str = binary_fn_code_pattern_v3;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str(), fnName.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str(), fnName.c_str());
}
else
{
std::string str = binary_fn_code_pattern_v3_scalar;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str(), fnName.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str(), fnName.c_str());
}
}
else
{
// do regular
std::string str = binary_fn_code_pattern;
- kernelSource = string_format(
+ kernelSource = str_sprintf(
str, pragma_str.c_str(), tname.c_str(), vecSizeNames[i],
tname.c_str(), vecSecParam ? vecSizeNames[i] : "",
tname.c_str(), vecSizeNames[i], fnName.c_str());
@@ -203,13 +213,20 @@ int max_verify(const T* const x, const T* const y, const T* const out,
{
int k = i * vecSize + j;
int l = (k * vecParam + i * (1 - vecParam));
- T v = (x[k] < y[l]) ? y[l] : x[k];
+ T v = (conv_to_dbl(x[k]) < conv_to_dbl(y[l])) ? y[l] : x[k];
if (v != out[k])
{
- log_error(
- "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is "
- "vector %d, element %d, for vector size %d)\n",
- k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
+ if (std::is_same<T, half>::value)
+ log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
+ "(index %d is "
+ "vector %d, element %d, for vector size %d)\n",
+ k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k,
+ conv_to_flt(out[k]), v, k, i, j, vecSize);
+ else
+ log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
+ "(index %d is "
+ "vector %d, element %d, for vector size %d)\n",
+ k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
return -1;
}
}
@@ -227,13 +244,20 @@ int min_verify(const T* const x, const T* const y, const T* const out,
{
int k = i * vecSize + j;
int l = (k * vecParam + i * (1 - vecParam));
- T v = (x[k] > y[l]) ? y[l] : x[k];
+ T v = (conv_to_dbl(x[k]) > conv_to_dbl(y[l])) ? y[l] : x[k];
if (v != out[k])
{
- log_error(
- "x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. (index %d is "
- "vector %d, element %d, for vector size %d)\n",
- k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
+ if (std::is_same<T, half>::value)
+ log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
+ "(index %d is "
+ "vector %d, element %d, for vector size %d)\n",
+ k, conv_to_flt(x[k]), l, conv_to_flt(y[l]), k,
+ conv_to_flt(out[k]), v, k, i, j, vecSize);
+ else
+ log_error("x[%d]=%g y[%d]=%g out[%d]=%g, expected %g. "
+ "(index %d is "
+ "vector %d, element %d, for vector size %d)\n",
+ k, x[k], l, y[l], k, out[k], v, k, i, j, vecSize);
return -1;
}
}
@@ -246,6 +270,13 @@ int min_verify(const T* const x, const T* const y, const T* const out,
cl_int MaxTest::Run()
{
cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_binary_fn<cl_half>(device, context, queue, num_elems,
+ fnName.c_str(), vecParam,
+ max_verify<cl_half>);
+ test_error(error, "MaxTest::Run<cl_half> failed");
+ }
error = test_binary_fn<float>(device, context, queue, num_elems,
fnName.c_str(), vecParam, max_verify<float>);
@@ -265,6 +296,13 @@ cl_int MaxTest::Run()
cl_int MinTest::Run()
{
cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_binary_fn<cl_half>(device, context, queue, num_elems,
+ fnName.c_str(), vecParam,
+ min_verify<cl_half>);
+ test_error(error, "MinTest::Run<cl_half> failed");
+ }
error = test_binary_fn<float>(device, context, queue, num_elems,
fnName.c_str(), vecParam, min_verify<float>);
diff --git a/test_conformance/commonfns/test_clamp.cpp b/test_conformance/commonfns/test_clamp.cpp
index 0e96fb60..1bf40677 100644
--- a/test_conformance/commonfns/test_clamp.cpp
+++ b/test_conformance/commonfns/test_clamp.cpp
@@ -26,12 +26,10 @@
#include "procs.h"
#include "test_base.h"
-
#ifndef M_PI
#define M_PI 3.14159265358979323846264338327950288
#endif
-
#define CLAMP_KERNEL(type) \
const char *clamp_##type##_kernel_code = EMIT_PRAGMA_DIRECTIVE \
"__kernel void test_clamp(__global " #type " *x, __global " #type \
@@ -64,6 +62,14 @@
"vload3(tid,maxval)), tid, dst);\n" \
"}\n";
+#define EMIT_PRAGMA_DIRECTIVE "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n"
+CLAMP_KERNEL(half)
+CLAMP_KERNEL_V(half, 2)
+CLAMP_KERNEL_V(half, 4)
+CLAMP_KERNEL_V(half, 8)
+CLAMP_KERNEL_V(half, 16)
+CLAMP_KERNEL_V3(half, 3)
+#undef EMIT_PRAGMA_DIRECTIVE
#define EMIT_PRAGMA_DIRECTIVE " "
CLAMP_KERNEL(float)
@@ -83,6 +89,10 @@ CLAMP_KERNEL_V(double, 16)
CLAMP_KERNEL_V3(double, 3)
#undef EMIT_PRAGMA_DIRECTIVE
+const char *clamp_half_codes[] = {
+ clamp_half_kernel_code, clamp_half2_kernel_code, clamp_half4_kernel_code,
+ clamp_half8_kernel_code, clamp_half16_kernel_code, clamp_half3_kernel_code
+};
const char *clamp_float_codes[] = {
clamp_float_kernel_code, clamp_float2_kernel_code,
clamp_float4_kernel_code, clamp_float8_kernel_code,
@@ -96,21 +106,42 @@ const char *clamp_double_codes[] = {
namespace {
-
template <typename T>
int verify_clamp(const T *const x, const T *const minval, const T *const maxval,
const T *const outptr, int n)
{
- T t;
- for (int i = 0; i < n; i++)
+ if (std::is_same<T, half>::value)
+ {
+ float t;
+ for (int i = 0; i < n; i++)
+ {
+ t = std::min(
+ std::max(cl_half_to_float(x[i]), cl_half_to_float(minval[i])),
+ cl_half_to_float(maxval[i]));
+ if (t != cl_half_to_float(outptr[i]))
+ {
+ log_error(
+ "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n",
+ i, cl_half_to_float(x[i]), cl_half_to_float(minval[i]),
+ cl_half_to_float(maxval[i]), t,
+ cl_half_to_float(outptr[i]));
+ return -1;
+ }
+ }
+ }
+ else
{
- t = std::min(std::max(x[i], minval[i]), maxval[i]);
- if (t != outptr[i])
+ T t;
+ for (int i = 0; i < n; i++)
{
- log_error(
- "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n", i,
- x[i], minval[i], maxval[i], t, outptr[i]);
- return -1;
+ t = std::min(std::max(x[i], minval[i]), maxval[i]);
+ if (t != outptr[i])
+ {
+ log_error(
+ "%d) verification error: clamp( %a, %a, %a) = *%a vs. %a\n",
+ i, x[i], minval[i], maxval[i], t, outptr[i]);
+ return -1;
+ }
}
}
@@ -118,7 +149,6 @@ int verify_clamp(const T *const x, const T *const minval, const T *const maxval,
}
}
-
template <typename T>
int test_clamp_fn(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems)
@@ -169,6 +199,17 @@ int test_clamp_fn(cl_device_id device, cl_context context,
input_ptr[2][j] = get_random_double(input_ptr[1][j], 0x20000000, d);
}
}
+ else if (std::is_same<T, half>::value)
+ {
+ const float fval = CL_HALF_MAX;
+ for (j = 0; j < num_elements; j++)
+ {
+ input_ptr[0][j] = conv_to_half(get_random_float(-fval, fval, d));
+ input_ptr[1][j] = conv_to_half(get_random_float(-fval, fval, d));
+ input_ptr[2][j] = conv_to_half(
+ get_random_float(conv_to_flt(input_ptr[1][j]), fval, d));
+ }
+ }
for (i = 0; i < 3; i++)
{
@@ -194,9 +235,16 @@ int test_clamp_fn(cl_device_id device, cl_context context,
"test_clamp");
test_error(err, "Unable to create kernel");
}
+ else if (std::is_same<T, half>::value)
+ {
+ err = create_single_kernel_helper(
+ context, &programs[i], &kernels[i], 1, &clamp_half_codes[i],
+ "test_clamp");
+ test_error(err, "Unable to create kernel");
+ }
- log_info("Just made a program for float, i=%d, size=%d, in slot %d\n",
- i, g_arrVecSizes[i], i);
+ log_info("Just made a program for %s, i=%d, size=%d, in slot %d\n",
+ tname.c_str(), i, g_arrVecSizes[i], i);
fflush(stdout);
for (j = 0; j < 4; j++)
@@ -239,10 +287,14 @@ int test_clamp_fn(cl_device_id device, cl_context context,
return err;
}
-
cl_int ClampTest::Run()
{
cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_clamp_fn<cl_half>(device, context, queue, num_elems);
+ test_error(error, "ClampTest::Run<cl_half> failed");
+ }
error = test_clamp_fn<float>(device, context, queue, num_elems);
test_error(error, "ClampTest::Run<float> failed");
@@ -256,7 +308,6 @@ cl_int ClampTest::Run()
return error;
}
-
int test_clamp(cl_device_id device, cl_context context, cl_command_queue queue,
int n_elems)
{
diff --git a/test_conformance/commonfns/test_mix.cpp b/test_conformance/commonfns/test_mix.cpp
index 92c10100..2a06e43d 100644
--- a/test_conformance/commonfns/test_mix.cpp
+++ b/test_conformance/commonfns/test_mix.cpp
@@ -18,6 +18,8 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include "harness/stringHelpers.h"
+
#include "procs.h"
#include "test_base.h"
@@ -52,33 +54,42 @@ const char *mix_fn_code_pattern_v3_scalar =
" vstore3(mix(vload3(tid, x), vload3(tid, y), a[tid]), tid, dst);\n"
"}\n";
-
#define MAX_ERR 1e-3
namespace {
-
template <typename T>
int verify_mix(const T *const inptrX, const T *const inptrY,
const T *const inptrA, const T *const outptr, const int n,
const int veclen, const bool vecParam)
{
- T r;
- float delta = 0.0f;
+ double r, o;
+ float delta = 0.f, max_delta = 0.f;
int i;
if (vecParam)
{
for (i = 0; i < n * veclen; i++)
{
- r = inptrX[i] + ((inptrY[i] - inptrX[i]) * inptrA[i]);
- delta = fabs(double(r - outptr[i])) / r;
- if (delta > MAX_ERR)
+ r = conv_to_dbl(inptrX[i])
+ + ((conv_to_dbl(inptrY[i]) - conv_to_dbl(inptrX[i]))
+ * conv_to_dbl(inptrA[i]));
+
+ o = conv_to_dbl(outptr[i]);
+ delta = fabs(double(r - o)) / r;
+ if (!std::is_same<T, half>::value)
+ {
+ if (delta > MAX_ERR)
+ {
+ log_error("%d) verification error: mix(%a, %a, %a) = *%a "
+ "vs. %a\n",
+ i, inptrX[i], inptrY[i], inptrA[i], r, outptr[i]);
+ return -1;
+ }
+ }
+ else
{
- log_error(
- "%d) verification error: mix(%a, %a, %a) = *%a vs. %a\n", i,
- inptrX[i], inptrY[i], inptrA[i], r, outptr[i]);
- return -1;
+ max_delta = std::max(max_delta, delta);
}
}
}
@@ -90,25 +101,40 @@ int verify_mix(const T *const inptrX, const T *const inptrY,
int vi = i * veclen;
for (int j = 0; j < veclen; ++j, ++vi)
{
- r = inptrX[vi] + ((inptrY[vi] - inptrX[vi]) * inptrA[i]);
- delta = fabs(double(r - outptr[vi])) / r;
- if (delta > MAX_ERR)
+ r = conv_to_dbl(inptrX[vi])
+ + ((conv_to_dbl(inptrY[vi]) - conv_to_dbl(inptrX[vi]))
+ * conv_to_dbl(inptrA[i]));
+ delta = fabs(double(r - conv_to_dbl(outptr[vi]))) / r;
+ if (!std::is_same<T, half>::value)
{
- log_error("{%d, element %d}) verification error: mix(%a, "
- "%a, %a) = *%a vs. %a\n",
- ii, j, inptrX[vi], inptrY[vi], inptrA[i], r,
- outptr[vi]);
- return -1;
+ if (delta > MAX_ERR)
+ {
+ log_error(
+ "{%d, element %d}) verification error: mix(%a, "
+ "%a, %a) = *%a vs. %a\n",
+ ii, j, inptrX[vi], inptrY[vi], inptrA[i], r,
+ outptr[vi]);
+ return -1;
+ }
+ }
+ else
+ {
+ max_delta = std::max(max_delta, delta);
}
}
}
}
+ // due to the fact that accuracy of mix for cl_khr_fp16 is implementation
+ // defined this test only reports maximum error without testing maximum
+ // error threshold
+ if (std::is_same<T, half>::value)
+ log_error("mix half verification result, max delta: %a\n", max_delta);
+
return 0;
}
} // namespace
-
template <typename T>
int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
int n_elems, bool vecParam)
@@ -120,7 +146,7 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
std::vector<clKernelWrapper> kernels;
int err, i;
- MTdataHolder d = MTdataHolder(gRandomSeed);
+ MTdataHolder d(gRandomSeed);
assert(BaseFunctionTest::type2name.find(sizeof(T))
!= BaseFunctionTest::type2name.end());
@@ -142,19 +168,32 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
test_error(err, "clCreateBuffer failed");
}
- for (i = 0; i < num_elements; i++)
- {
- input_ptr[0][i] = (T)genrand_real1(d);
- input_ptr[1][i] = (T)genrand_real1(d);
- input_ptr[2][i] = (T)genrand_real1(d);
- }
-
std::string pragma_str;
if (std::is_same<T, double>::value)
{
pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
}
+ if (std::is_same<T, half>::value)
+ {
+ pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+ for (i = 0; i < num_elements; i++)
+ {
+ input_ptr[0][i] = conv_to_half((float)genrand_real1(d));
+ input_ptr[1][i] = conv_to_half((float)genrand_real1(d));
+ input_ptr[2][i] = conv_to_half((float)genrand_real1(d));
+ }
+ }
+ else
+ {
+ for (i = 0; i < num_elements; i++)
+ {
+ input_ptr[0][i] = (T)genrand_real1(d);
+ input_ptr[1][i] = (T)genrand_real1(d);
+ input_ptr[2][i] = (T)genrand_real1(d);
+ }
+ }
+
for (i = 0; i < 3; i++)
{
err = clEnqueueWriteBuffer(queue, streams[i], CL_TRUE, 0,
@@ -164,7 +203,6 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
}
char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
-
for (i = 0; i < kTotalVecCount; i++)
{
std::string kernelSource;
@@ -174,15 +212,15 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
{
std::string str = mix_fn_code_pattern_v3;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str(), tname.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str(), tname.c_str());
}
else
{
std::string str = mix_fn_code_pattern_v3_scalar;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str(), tname.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str(), tname.c_str());
}
}
else
@@ -190,10 +228,10 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
// regular path
std::string str = mix_fn_code_pattern;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- vecSizeNames[i], tname.c_str(), vecSizeNames[i],
- tname.c_str(), vecParam ? vecSizeNames[i] : "",
- tname.c_str(), vecSizeNames[i]);
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ vecSizeNames[i], tname.c_str(), vecSizeNames[i],
+ tname.c_str(), vecParam ? vecSizeNames[i] : "",
+ tname.c_str(), vecSizeNames[i]);
}
const char *programPtr = kernelSource.c_str();
err =
@@ -242,10 +280,14 @@ int test_mix_fn(cl_device_id device, cl_context context, cl_command_queue queue,
return err;
}
-
cl_int MixTest::Run()
{
cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_mix_fn<half>(device, context, queue, num_elems, vecParam);
+ test_error(error, "MixTest::Run<cl_half> failed");
+ }
error = test_mix_fn<float>(device, context, queue, num_elems, vecParam);
test_error(error, "MixTest::Run<float> failed");
@@ -260,7 +302,6 @@ cl_int MixTest::Run()
return error;
}
-
int test_mix(cl_device_id device, cl_context context, cl_command_queue queue,
int n_elems)
{
@@ -268,7 +309,6 @@ int test_mix(cl_device_id device, cl_context context, cl_command_queue queue,
true);
}
-
int test_mixf(cl_device_id device, cl_context context, cl_command_queue queue,
int n_elems)
{
diff --git a/test_conformance/commonfns/test_smoothstep.cpp b/test_conformance/commonfns/test_smoothstep.cpp
index 31948d3f..5afc2d0f 100644
--- a/test_conformance/commonfns/test_smoothstep.cpp
+++ b/test_conformance/commonfns/test_smoothstep.cpp
@@ -18,10 +18,11 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include "harness/stringHelpers.h"
+
#include "procs.h"
#include "test_base.h"
-
const char *smoothstep_fn_code_pattern =
"%s\n" /* optional pragma */
"__kernel void test_fn(__global %s%s *e0, __global %s%s *e1, __global %s%s "
@@ -53,38 +54,43 @@ const char *smoothstep_fn_code_pattern_v3_scalar =
" vstore3(smoothstep(e0[tid], e1[tid], vload3(tid,x)), tid, dst);\n"
"}\n";
-
#define MAX_ERR (1e-5f)
namespace {
-
template <typename T>
int verify_smoothstep(const T *const edge0, const T *const edge1,
const T *const x, const T *const outptr, const int n,
const int veclen, const bool vecParam)
{
- T r, t;
- float delta = 0;
+ double r, t;
+ float delta = 0, max_delta = 0;
if (vecParam)
{
for (int i = 0; i < n * veclen; i++)
{
- t = (x[i] - edge0[i]) / (edge1[i] - edge0[i]);
- if (t < 0.0f)
- t = 0.0f;
- else if (t > 1.0f)
- t = 1.0f;
- r = t * t * (3.0f - 2.0f * t);
- delta = (float)fabs(r - outptr[i]);
- if (delta > MAX_ERR)
+ t = (conv_to_dbl(x[i]) - conv_to_dbl(edge0[i]))
+ / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i]));
+ if (t < 0.0)
+ t = 0.0;
+ else if (t > 1.0)
+ t = 1.0;
+ r = t * t * (3.0 - 2.0 * t);
+ delta = (float)fabs(r - conv_to_dbl(outptr[i]));
+ if (!std::is_same<T, half>::value)
{
- log_error("%d) verification error: smoothstep(%a, %a, %a) = "
- "*%a vs. %a\n",
- i, x[i], edge0[i], edge1[i], r, outptr[i]);
- return -1;
+ if (delta > MAX_ERR)
+ {
+ log_error(
+ "%d) verification error: smoothstep(%a, %a, %a) = "
+ "*%a vs. %a\n",
+ i, x[i], edge0[i], edge1[i], r, outptr[i]);
+ return -1;
+ }
}
+ else
+ max_delta = std::max(max_delta, delta);
}
}
else
@@ -95,32 +101,48 @@ int verify_smoothstep(const T *const edge0, const T *const edge1,
int vi = i * veclen;
for (int j = 0; j < veclen; ++j, ++vi)
{
- t = (x[vi] - edge0[i]) / (edge1[i] - edge0[i]);
- if (t < 0.0f)
- t = 0.0f;
- else if (t > 1.0f)
- t = 1.0f;
- r = t * t * (3.0f - 2.0f * t);
- delta = (float)fabs(r - outptr[vi]);
- if (delta > MAX_ERR)
+ t = (conv_to_dbl(x[vi]) - conv_to_dbl(edge0[i]))
+ / (conv_to_dbl(edge1[i]) - conv_to_dbl(edge0[i]));
+ if (t < 0.0)
+ t = 0.0;
+ else if (t > 1.0)
+ t = 1.0;
+ r = t * t * (3.0 - 2.0 * t);
+ delta = (float)fabs(r - conv_to_dbl(outptr[vi]));
+
+ if (!std::is_same<T, half>::value)
{
- log_error("{%d, element %d}) verification error: "
- "smoothstep(%a, %a, %a) = *%a vs. %a\n",
- ii, j, x[vi], edge0[i], edge1[i], r, outptr[vi]);
- return -1;
+ if (delta > MAX_ERR)
+ {
+ log_error("{%d, element %d}) verification error: "
+ "smoothstep(%a, %a, %a) = *%a vs. %a\n",
+ ii, j, x[vi], edge0[i], edge1[i], r,
+ outptr[vi]);
+ return -1;
+ }
}
+ else
+ max_delta = std::max(max_delta, delta);
}
}
}
+
+ // due to the fact that accuracy of smoothstep for cl_khr_fp16 is
+ // implementation defined this test only reports maximum error without
+ // testing maximum error threshold
+ if (std::is_same<T, half>::value)
+ log_error("smoothstep half verification result, max delta: %a\n",
+ max_delta);
+
return 0;
}
}
-
template <typename T>
int test_smoothstep_fn(cl_device_id device, cl_context context,
- cl_command_queue queue, int n_elems, bool vecParam)
+ cl_command_queue queue, const int n_elems,
+ const bool vecParam)
{
clMemWrapper streams[4];
std::vector<T> input_ptr[3], output_ptr;
@@ -170,6 +192,17 @@ int test_smoothstep_fn(cl_device_id device, cl_context context,
input_ptr[2][i] = get_random_double(-0x20000000, 0x20000000, d);
}
}
+ else if (std::is_same<T, half>::value)
+ {
+ pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+ for (i = 0; i < num_elements; i++)
+ {
+ input_ptr[0][i] = conv_to_half(get_random_float(-65503, 65503, d));
+ input_ptr[1][i] = conv_to_half(
+ get_random_float(conv_to_flt(input_ptr[0][i]), 65503, d));
+ input_ptr[2][i] = conv_to_half(get_random_float(-65503, 65503, d));
+ }
+ }
for (i = 0; i < 3; i++)
{
@@ -179,7 +212,7 @@ int test_smoothstep_fn(cl_device_id device, cl_context context,
test_error(err, "Unable to write input buffer");
}
- char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+ const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
for (i = 0; i < kTotalVecCount; i++)
{
@@ -190,15 +223,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context,
{
std::string str = smoothstep_fn_code_pattern_v3;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str(), tname.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str(), tname.c_str());
}
else
{
std::string str = smoothstep_fn_code_pattern_v3_scalar;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str(), tname.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str(), tname.c_str());
}
}
else
@@ -206,11 +239,12 @@ int test_smoothstep_fn(cl_device_id device, cl_context context,
// regular path
std::string str = smoothstep_fn_code_pattern;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- vecParam ? vecSizeNames[i] : "", tname.c_str(),
- vecParam ? vecSizeNames[i] : "", tname.c_str(),
- vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ vecParam ? vecSizeNames[i] : "", tname.c_str(),
+ vecParam ? vecSizeNames[i] : "", tname.c_str(),
+ vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
}
+
const char *programPtr = kernelSource.c_str();
err =
create_single_kernel_helper(context, &programs[i], &kernels[i], 1,
@@ -259,10 +293,15 @@ int test_smoothstep_fn(cl_device_id device, cl_context context,
return err;
}
-
cl_int SmoothstepTest::Run()
{
cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_smoothstep_fn<half>(device, context, queue, num_elems,
+ vecParam);
+ test_error(error, "SmoothstepTest::Run<cl_half> failed");
+ }
error =
test_smoothstep_fn<float>(device, context, queue, num_elems, vecParam);
@@ -278,7 +317,6 @@ cl_int SmoothstepTest::Run()
return error;
}
-
int test_smoothstep(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems)
{
@@ -286,7 +324,6 @@ int test_smoothstep(cl_device_id device, cl_context context,
"smoothstep", true);
}
-
int test_smoothstepf(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems)
{
diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp
index dc91766e..1cfa96ea 100644
--- a/test_conformance/commonfns/test_step.cpp
+++ b/test_conformance/commonfns/test_step.cpp
@@ -18,10 +18,11 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include "harness/stringHelpers.h"
+
#include "procs.h"
#include "test_base.h"
-
const char *step_fn_code_pattern = "%s\n" /* optional pragma */
"__kernel void test_fn(__global %s%s *edge, "
"__global %s%s *x, __global %s%s *dst)\n"
@@ -48,7 +49,6 @@ const char *step_fn_code_pattern_v3_scalar =
" vstore3(step(edge[tid], vload3(tid,x)), tid, dst);\n"
"}\n";
-
namespace {
template <typename T>
@@ -62,8 +62,8 @@ int verify_step(const T *const inptrA, const T *const inptrB,
{
for (int i = 0; i < n * veclen; i++)
{
- r = (inptrB[i] < inptrA[i]) ? 0.0 : 1.0;
- if (r != outptr[i]) return -1;
+ r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[i])) ? 0.0 : 1.0;
+ if (r != conv_to_dbl(outptr[i])) return -1;
}
}
else
@@ -73,24 +73,31 @@ int verify_step(const T *const inptrA, const T *const inptrB,
int ii = i / veclen;
for (int j = 0; j < veclen && i < n; ++j, ++i)
{
- r = (inptrB[i] < inptrA[ii]) ? 0.0f : 1.0f;
- if (r != outptr[i])
+ r = (conv_to_dbl(inptrB[i]) < conv_to_dbl(inptrA[ii])) ? 0.0f
+ : 1.0f;
+ if (r != conv_to_dbl(outptr[i]))
{
- log_error("Failure @ {%d, element %d}: step(%a,%a) -> *%a "
- "vs %a\n",
- ii, j, inptrA[ii], inptrB[i], r, outptr[i]);
+ if (std::is_same<T, half>::value)
+ log_error(
+ "Failure @ {%d, element %d}: step(%a,%a) -> *%a "
+ "vs %a\n",
+ ii, j, conv_to_flt(inptrA[ii]),
+ conv_to_flt(inptrB[i]), r, conv_to_flt(outptr[i]));
+ else
+ log_error(
+ "Failure @ {%d, element %d}: step(%a,%a) -> *%a "
+ "vs %a\n",
+ ii, j, inptrA[ii], inptrB[i], r, outptr[i]);
return -1;
}
}
}
}
-
return 0;
}
}
-
template <typename T>
int test_step_fn(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems, bool vecParam)
@@ -140,6 +147,16 @@ int test_step_fn(cl_device_id device, cl_context context,
input_ptr[1][i] = get_random_double(-0x40000000, 0x40000000, d);
}
}
+ else if (std::is_same<T, half>::value)
+ {
+ const float fval = CL_HALF_MAX;
+ pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+ for (i = 0; i < num_elements; i++)
+ {
+ input_ptr[0][i] = conv_to_half(get_random_float(-fval, fval, d));
+ input_ptr[1][i] = conv_to_half(get_random_float(-fval, fval, d));
+ }
+ }
for (i = 0; i < 2; i++)
{
@@ -160,15 +177,15 @@ int test_step_fn(cl_device_id device, cl_context context,
{
std::string str = step_fn_code_pattern_v3;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str());
}
else
{
std::string str = step_fn_code_pattern_v3_scalar;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), tname.c_str());
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), tname.c_str());
}
}
else
@@ -176,9 +193,9 @@ int test_step_fn(cl_device_id device, cl_context context,
// regular path
std::string str = step_fn_code_pattern;
kernelSource =
- string_format(str, pragma_str.c_str(), tname.c_str(),
- vecParam ? vecSizeNames[i] : "", tname.c_str(),
- vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
+ str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ vecParam ? vecSizeNames[i] : "", tname.c_str(),
+ vecSizeNames[i], tname.c_str(), vecSizeNames[i]);
}
const char *programPtr = kernelSource.c_str();
err =
@@ -229,10 +246,14 @@ int test_step_fn(cl_device_id device, cl_context context,
return err;
}
-
cl_int StepTest::Run()
{
cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_step_fn<half>(device, context, queue, num_elems, vecParam);
+ test_error(error, "StepTest::Run<cl_half> failed");
+ }
error = test_step_fn<float>(device, context, queue, num_elems, vecParam);
test_error(error, "StepTest::Run<float> failed");
@@ -247,7 +268,6 @@ cl_int StepTest::Run()
return error;
}
-
int test_step(cl_device_id device, cl_context context, cl_command_queue queue,
int n_elems)
{
@@ -255,7 +275,6 @@ int test_step(cl_device_id device, cl_context context, cl_command_queue queue,
true);
}
-
int test_stepf(cl_device_id device, cl_context context, cl_command_queue queue,
int n_elems)
{
diff --git a/test_conformance/commonfns/test_unary_fn.cpp b/test_conformance/commonfns/test_unary_fn.cpp
index fed4389d..91b5c215 100644
--- a/test_conformance/commonfns/test_unary_fn.cpp
+++ b/test_conformance/commonfns/test_unary_fn.cpp
@@ -21,6 +21,7 @@
#include <vector>
#include "harness/deviceInfo.h"
+#include "harness/stringHelpers.h"
#include "harness/typeWrappers.h"
#include "procs.h"
@@ -30,7 +31,6 @@
#define M_PI 3.14159265358979323846264338327950288
#endif
-
// clang-format off
const char *unary_fn_code_pattern =
"%s\n" /* optional pragma */
@@ -51,23 +51,10 @@ const char *unary_fn_code_pattern_v3 =
"}\n";
// clang-format on
-
#define MAX_ERR 2.0f
namespace {
-
-template <typename T> float UlpFn(const T &val, const double &r)
-{
- if (std::is_same<T, double>::value)
- return Ulp_Error_Double(val, r);
- else if (std::is_same<T, float>::value)
- return Ulp_Error(val, r);
- else if (std::is_same<T, half>::value)
- return Ulp_Error(val, r);
-}
-
-
template <typename T>
int verify_degrees(const T *const inptr, const T *const outptr, int n)
{
@@ -77,7 +64,11 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
for (int i = 0, j = 0; i < n; i++, j++)
{
- r = (180.0 / M_PI) * inptr[i];
+ r = (180.0 / M_PI) * conv_to_dbl(inptr[i]);
+
+ if (std::is_same<T, half>::value)
+ if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i]))
+ continue;
error = UlpFn(outptr[i], r);
@@ -88,21 +79,32 @@ int verify_degrees(const T *const inptr, const T *const outptr, int n)
max_val = r;
if (fabsf(error) > MAX_ERR)
{
- log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n",
- i, inptr[i], r, outptr[i], r, outptr[i], error);
+ if (std::is_same<T, half>::value)
+ log_error(
+ "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i,
+ conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r,
+ conv_to_flt(outptr[i]), error);
+ else
+ log_error(
+ "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i,
+ inptr[i], r, outptr[i], r, outptr[i], error);
return 1;
}
}
}
- log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
- max_error, max_index, max_val, outptr[max_index], max_val,
- outptr[max_index]);
+ if (std::is_same<T, half>::value)
+ log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
+ max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
+ max_val, conv_to_flt(outptr[max_index]));
+ else
+ log_info("degrees: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
+ max_error, max_index, max_val, outptr[max_index], max_val,
+ outptr[max_index]);
return 0;
}
-
template <typename T>
int verify_radians(const T *const inptr, const T *const outptr, int n)
{
@@ -112,8 +114,14 @@ int verify_radians(const T *const inptr, const T *const outptr, int n)
for (int i = 0, j = 0; i < n; i++, j++)
{
- r = (M_PI / 180.0) * inptr[i];
- error = Ulp_Error(outptr[i], r);
+ r = (M_PI / 180.0) * conv_to_dbl(inptr[i]);
+
+ if (std::is_same<T, half>::value)
+ if (!isfinite_fp(conv_to_half(r)) && !isfinite_fp(outptr[i]))
+ continue;
+
+ error = UlpFn(outptr[i], r);
+
if (fabsf(error) > max_error)
{
max_error = error;
@@ -121,41 +129,51 @@ int verify_radians(const T *const inptr, const T *const outptr, int n)
max_val = r;
if (fabsf(error) > MAX_ERR)
{
- log_error("%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n",
- i, inptr[i], r, outptr[i], r, outptr[i], error);
+ if (std::is_same<T, half>::value)
+ log_error(
+ "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i,
+ conv_to_flt(inptr[i]), r, conv_to_flt(outptr[i]), r,
+ conv_to_flt(outptr[i]), error);
+ else
+ log_error(
+ "%d) Error @ %a: *%a vs %a (*%g vs %g) ulps: %f\n", i,
+ inptr[i], r, outptr[i], r, outptr[i], error);
return 1;
}
}
}
- log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
- max_error, max_index, max_val, outptr[max_index], max_val,
- outptr[max_index]);
+ if (std::is_same<T, half>::value)
+ log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
+ max_error, max_index, max_val, conv_to_flt(outptr[max_index]),
+ max_val, conv_to_flt(outptr[max_index]));
+ else
+ log_info("radians: Max error %f ulps at %d: *%a vs %a (*%g vs %g)\n",
+ max_error, max_index, max_val, outptr[max_index], max_val,
+ outptr[max_index]);
return 0;
}
-
template <typename T>
int verify_sign(const T *const inptr, const T *const outptr, int n)
{
- T r = 0;
+ double r = 0;
for (int i = 0; i < n; i++)
{
- if (inptr[i] > 0.0f)
+ if (conv_to_dbl(inptr[i]) > 0.0f)
r = 1.0;
- else if (inptr[i] < 0.0f)
+ else if (conv_to_dbl(inptr[i]) < 0.0f)
r = -1.0;
else
r = 0.0;
- if (r != outptr[i]) return -1;
+ if (r != conv_to_dbl(outptr[i])) return -1;
}
return 0;
}
}
-
template <typename T>
int test_unary_fn(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems,
@@ -207,33 +225,38 @@ int test_unary_fn(cl_device_id device, cl_context context,
get_random_double(-100000.0 * M_PI, 100000.0 * M_PI, d);
}
}
+ else if (std::is_same<T, half>::value)
+ {
+ pragma_str = "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+ for (int j = 0; j < num_elements; j++)
+ {
+ input_ptr[j] = conv_to_half(get_random_float(
+ (float)(-10000.f * M_PI), (float)(10000.f * M_PI), d));
+ }
+ }
err = clEnqueueWriteBuffer(queue, streams[0], true, 0,
sizeof(T) * num_elements, &input_ptr.front(), 0,
NULL, NULL);
- if (err != CL_SUCCESS)
- {
- log_error("clEnqueueWriteBuffer failed\n");
- return -1;
- }
+ test_error(err, "clEnqueueWriteBuffer failed\n");
for (i = 0; i < kTotalVecCount; i++)
{
std::string kernelSource;
- char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
+ const char vecSizeNames[][3] = { "", "2", "4", "8", "16", "3" };
if (i >= kVectorSizeCount)
{
std::string str = unary_fn_code_pattern_v3;
- kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(),
- tname.c_str(), fnName.c_str());
+ kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ tname.c_str(), fnName.c_str());
}
else
{
std::string str = unary_fn_code_pattern;
- kernelSource = string_format(str, pragma_str.c_str(), tname.c_str(),
- vecSizeNames[i], tname.c_str(),
- vecSizeNames[i], fnName.c_str());
+ kernelSource = str_sprintf(str, pragma_str.c_str(), tname.c_str(),
+ vecSizeNames[i], tname.c_str(),
+ vecSizeNames[i], fnName.c_str());
}
/* Create kernels */
@@ -290,11 +313,18 @@ int test_unary_fn(cl_device_id device, cl_context context,
return err;
}
-
cl_int DegreesTest::Run()
{
- cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
- fnName.c_str(), verify_degrees<float>);
+ cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_unary_fn<half>(device, context, queue, num_elems,
+ fnName.c_str(), verify_degrees<half>);
+ test_error(error, "DegreesTest::Run<cl_half> failed");
+ }
+
+ error = test_unary_fn<float>(device, context, queue, num_elems,
+ fnName.c_str(), verify_degrees<float>);
test_error(error, "DegreesTest::Run<float> failed");
if (is_extension_available(device, "cl_khr_fp64"))
@@ -307,11 +337,18 @@ cl_int DegreesTest::Run()
return error;
}
-
cl_int RadiansTest::Run()
{
- cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
- fnName.c_str(), verify_radians<float>);
+ cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_unary_fn<half>(device, context, queue, num_elems,
+ fnName.c_str(), verify_radians<half>);
+ test_error(error, "RadiansTest::Run<cl_half> failed");
+ }
+
+ error = test_unary_fn<float>(device, context, queue, num_elems,
+ fnName.c_str(), verify_radians<float>);
test_error(error, "RadiansTest::Run<float> failed");
if (is_extension_available(device, "cl_khr_fp64"))
@@ -324,11 +361,18 @@ cl_int RadiansTest::Run()
return error;
}
-
cl_int SignTest::Run()
{
- cl_int error = test_unary_fn<float>(device, context, queue, num_elems,
- fnName.c_str(), verify_sign<float>);
+ cl_int error = CL_SUCCESS;
+ if (is_extension_available(device, "cl_khr_fp16"))
+ {
+ error = test_unary_fn<half>(device, context, queue, num_elems,
+ fnName.c_str(), verify_sign<half>);
+ test_error(error, "SignTest::Run<cl_half> failed");
+ }
+
+ error = test_unary_fn<float>(device, context, queue, num_elems,
+ fnName.c_str(), verify_sign<float>);
test_error(error, "SignTest::Run<float> failed");
if (is_extension_available(device, "cl_khr_fp64"))
@@ -341,7 +385,6 @@ cl_int SignTest::Run()
return error;
}
-
int test_degrees(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems)
{
@@ -349,7 +392,6 @@ int test_degrees(cl_device_id device, cl_context context,
"degrees");
}
-
int test_radians(cl_device_id device, cl_context context,
cl_command_queue queue, int n_elems)
{
@@ -357,7 +399,6 @@ int test_radians(cl_device_id device, cl_context context,
"radians");
}
-
int test_sign(cl_device_id device, cl_context context, cl_command_queue queue,
int n_elems)
{
diff --git a/test_conformance/compiler/test_compile.cpp b/test_conformance/compiler/test_compile.cpp
index f3ee4312..3af8125a 100644
--- a/test_conformance/compiler/test_compile.cpp
+++ b/test_conformance/compiler/test_compile.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -16,148 +16,147 @@
#include "testBase.h"
#if defined(_WIN32)
#include <time.h>
-#elif defined(__linux__) || defined(__APPLE__)
+#elif defined(__linux__) || defined(__APPLE__)
#include <sys/time.h>
#include <unistd.h>
#endif
#include "harness/conversions.h"
#define MAX_LINE_SIZE_IN_PROGRAM 1024
-#define MAX_LOG_SIZE_IN_PROGRAM 2048
+#define MAX_LOG_SIZE_IN_PROGRAM 2048
const char *sample_kernel_start =
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-" float temp;\n"
-" int tid = get_global_id(0);\n";
+ "__kernel void sample_test(__global float *src, __global int *dst)\n"
+ "{\n"
+ " float temp = 0.0f;\n"
+ " int tid = get_global_id(0);\n";
const char *sample_kernel_end = "}\n";
-const char *sample_kernel_lines[] = {
-"dst[tid] = src[tid];\n",
-"dst[tid] = src[tid] * 3.f;\n",
-"temp = src[tid] / 4.f;\n",
-"dst[tid] = dot(temp,src[tid]);\n",
-"dst[tid] = dst[tid] + temp;\n" };
+const char *sample_kernel_lines[] = { "dst[tid] = src[tid];\n",
+ "dst[tid] = src[tid] * 3.f;\n",
+ "temp = src[tid] / 4.f;\n",
+ "dst[tid] = dot(temp,src[tid]);\n",
+ "dst[tid] = dst[tid] + temp;\n" };
/* I compile and link therefore I am. Robert Ioffe */
-/* The following kernels are used in testing Improved Compilation and Linking feature */
-
-const char *simple_kernel =
-"__kernel void\n"
-"CopyBuffer(\n"
-" __global float* src,\n"
-" __global float* dst )\n"
-"{\n"
-" int id = (int)get_global_id(0);\n"
-" dst[id] = src[id];\n"
-"}\n";
+/* The following kernels are used in testing Improved Compilation and Linking
+ * feature */
+
+const char *simple_kernel = "__kernel void\n"
+ "CopyBuffer(\n"
+ " __global float* src,\n"
+ " __global float* dst )\n"
+ "{\n"
+ " int id = (int)get_global_id(0);\n"
+ " dst[id] = src[id];\n"
+ "}\n";
const char *simple_kernel_with_defines =
-"__kernel void\n"
-"CopyBuffer(\n"
-" __global float* src,\n"
-" __global float* dst )\n"
-"{\n"
-" int id = (int)get_global_id(0);\n"
-" float temp = src[id] - 42;\n"
-" dst[id] = FIRST + temp + SECOND;\n"
-"}\n";
-
-const char *simple_kernel_template =
-"__kernel void\n"
-"CopyBuffer%d(\n"
-" __global float* src,\n"
-" __global float* dst )\n"
-"{\n"
-" int id = (int)get_global_id(0);\n"
-" dst[id] = src[id];\n"
-"}\n";
-
-const char *composite_kernel_start =
-"__kernel void\n"
-"CompositeKernel(\n"
-" __global float* src,\n"
-" __global float* dst )\n"
-"{\n";
+ "__kernel void\n"
+ "CopyBuffer(\n"
+ " __global float* src,\n"
+ " __global float* dst )\n"
+ "{\n"
+ " int id = (int)get_global_id(0);\n"
+ " float temp = src[id] - 42;\n"
+ " dst[id] = FIRST + temp + SECOND;\n"
+ "}\n";
+
+const char *simple_kernel_template = "__kernel void\n"
+ "CopyBuffer%d(\n"
+ " __global float* src,\n"
+ " __global float* dst )\n"
+ "{\n"
+ " int id = (int)get_global_id(0);\n"
+ " dst[id] = src[id];\n"
+ "}\n";
+
+const char *composite_kernel_start = "__kernel void\n"
+ "CompositeKernel(\n"
+ " __global float* src,\n"
+ " __global float* dst )\n"
+ "{\n";
const char *composite_kernel_end = "}\n";
-const char *composite_kernel_template =
-" CopyBuffer%d(src, dst);\n";
-
-const char *composite_kernel_extern_template =
-"extern __kernel void\n"
-"CopyBuffer%d(\n"
-" __global float* src,\n"
-" __global float* dst );\n";
-
-const char *another_simple_kernel =
-"extern __kernel void\n"
-"CopyBuffer(\n"
-" __global float* src,\n"
-" __global float* dst );\n"
-"__kernel void\n"
-"AnotherCopyBuffer(\n"
-" __global float* src,\n"
-" __global float* dst )\n"
-"{\n"
-" CopyBuffer(src, dst);\n"
-"}\n";
-
-const char* simple_header =
-"extern __kernel void\n"
-"CopyBuffer(\n"
-" __global float* src,\n"
-" __global float* dst );\n";
-
-const char* simple_header_name = "simple_header.h";
-
-const char* another_simple_kernel_with_header =
-"#include \"simple_header.h\"\n"
-"__kernel void\n"
-"AnotherCopyBuffer(\n"
-" __global float* src,\n"
-" __global float* dst )\n"
-"{\n"
-" CopyBuffer(src, dst);\n"
-"}\n";
-
-const char* header_name_templates[4] = { "simple_header%d.h",
- "foo/simple_header%d.h",
- "foo/bar/simple_header%d.h",
- "foo/bar/baz/simple_header%d.h"};
-
-const char* include_header_name_templates[4] = { "#include \"simple_header%d.h\"\n",
- "#include \"foo/simple_header%d.h\"\n",
- "#include \"foo/bar/simple_header%d.h\"\n",
- "#include \"foo/bar/baz/simple_header%d.h\"\n"};
-
-const char* compile_extern_var = "extern constant float foo;\n";
-const char* compile_extern_struct = "extern constant struct bar bart;\n";
-const char* compile_extern_function = "extern int baz(int, int);\n";
-
-const char* compile_static_var = "static constant float foo = 2.78;\n";
-const char* compile_static_struct = "static constant struct bar {float x, y, z, r; int color; } foo = {3.14159};\n";
-const char* compile_static_function = "static int foo(int x, int y) { return x*x + y*y; }\n";
-
-const char* compile_regular_var = "constant float foo = 4.0f;\n";
-const char* compile_regular_struct = "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, 0.f, 0};\n";
-const char* compile_regular_function = "int foo(int x, int y) { return x*x + y*y; }\n";
-
-const char* link_static_var_access = // use with compile_static_var
-"extern constant float foo;\n"
-"float access_foo() { return foo; }\n";
-
-const char* link_static_struct_access = // use with compile_static_struct
-"extern constant struct bar{float x, y, z, r; int color; } foo;\n"
-"struct bar access_foo() {return foo; }\n";
-
-const char* link_static_function_access = // use with compile_static_function
-"extern int foo(int, int);\n"
-"int access_foo() { int blah = foo(3, 4); return blah + 5; }\n";
-
-int test_large_single_compile(cl_context context, cl_device_id deviceID, unsigned int numLines)
+const char *composite_kernel_template = " CopyBuffer%d(src, dst);\n";
+
+const char *composite_kernel_extern_template = "extern __kernel void\n"
+ "CopyBuffer%d(\n"
+ " __global float* src,\n"
+ " __global float* dst );\n";
+
+const char *another_simple_kernel = "extern __kernel void\n"
+ "CopyBuffer(\n"
+ " __global float* src,\n"
+ " __global float* dst );\n"
+ "__kernel void\n"
+ "AnotherCopyBuffer(\n"
+ " __global float* src,\n"
+ " __global float* dst )\n"
+ "{\n"
+ " CopyBuffer(src, dst);\n"
+ "}\n";
+
+const char *simple_header = "extern __kernel void\n"
+ "CopyBuffer(\n"
+ " __global float* src,\n"
+ " __global float* dst );\n";
+
+const char *simple_header_name = "simple_header.h";
+
+const char *another_simple_kernel_with_header = "#include \"simple_header.h\"\n"
+ "__kernel void\n"
+ "AnotherCopyBuffer(\n"
+ " __global float* src,\n"
+ " __global float* dst )\n"
+ "{\n"
+ " CopyBuffer(src, dst);\n"
+ "}\n";
+
+const char *header_name_templates[4] = { "simple_header%d.h",
+ "foo/simple_header%d.h",
+ "foo/bar/simple_header%d.h",
+ "foo/bar/baz/simple_header%d.h" };
+
+const char *include_header_name_templates[4] = {
+ "#include \"simple_header%d.h\"\n", "#include \"foo/simple_header%d.h\"\n",
+ "#include \"foo/bar/simple_header%d.h\"\n",
+ "#include \"foo/bar/baz/simple_header%d.h\"\n"
+};
+
+const char *compile_extern_var = "extern constant float foo;\n";
+const char *compile_extern_struct = "extern constant struct bar bart;\n";
+const char *compile_extern_function = "extern int baz(int, int);\n";
+
+const char *compile_static_var = "static constant float foo = 2.78;\n";
+const char *compile_static_struct = "static constant struct bar {float x, y, "
+ "z, r; int color; } foo = {3.14159};\n";
+const char *compile_static_function =
+ "static int foo(int x, int y) { return x*x + y*y; }\n";
+
+const char *compile_regular_var = "constant float foo = 4.0f;\n";
+const char *compile_regular_struct =
+ "constant struct bar {float x, y, z, r; int color; } foo = {0.f, 0.f, 0.f, "
+ "0.f, 0};\n";
+const char *compile_regular_function =
+ "int foo(int x, int y) { return x*x + y*y; }\n";
+
+const char *link_static_var_access = // use with compile_static_var
+ "extern constant float foo;\n"
+ "float access_foo() { return foo; }\n";
+
+const char *link_static_struct_access = // use with compile_static_struct
+ "extern constant struct bar{float x, y, z, r; int color; } foo;\n"
+ "struct bar access_foo() {return foo; }\n";
+
+const char *link_static_function_access = // use with compile_static_function
+ "extern int foo(int, int);\n"
+ "int access_foo() { int blah = foo(3, 4); return blah + 5; }\n";
+
+int test_large_single_compile(cl_context context, cl_device_id deviceID,
+ unsigned int numLines)
{
int error;
cl_program program;
@@ -166,96 +165,113 @@ int test_large_single_compile(cl_context context, cl_device_id deviceID, unsigne
MTdata d;
/* First, allocate the array for our line pointers */
- lines = (const char **)malloc( numLines * sizeof( const char * ) );
- if (lines == NULL) {
- log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+ lines = (const char **)malloc(numLines * sizeof(const char *));
+ if (lines == NULL)
+ {
+ log_error(
+ "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
/* First and last lines are easy */
- lines[ 0 ] = sample_kernel_start;
- lines[ numLines - 1 ] = sample_kernel_end;
+ lines[0] = sample_kernel_start;
+ lines[numLines - 1] = sample_kernel_end;
- numChoices = sizeof( sample_kernel_lines ) / sizeof( sample_kernel_lines[ 0 ] );
+ numChoices = sizeof(sample_kernel_lines) / sizeof(sample_kernel_lines[0]);
/* Fill the rest with random lines to hopefully prevent much optimization */
- d = init_genrand( gRandomSeed );
- for( i = 1; i < numLines - 1; i++ )
+ d = init_genrand(gRandomSeed);
+ for (i = 1; i < numLines - 1; i++)
{
- lines[ i ] = sample_kernel_lines[ genrand_int32(d) % numChoices ];
+ lines[i] = sample_kernel_lines[genrand_int32(d) % numChoices];
}
- free_mtdata(d); d = NULL;
+ free_mtdata(d);
+ d = NULL;
/* Try to create a program with these lines */
- error = create_single_kernel_helper_create_program(context, &program, numLines, lines);
- if( program == NULL || error != CL_SUCCESS )
- {
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
- free( lines );
+ error = create_single_kernel_helper_create_program(context, &program,
+ numLines, lines);
+ if (program == NULL || error != CL_SUCCESS)
+ {
+ log_error("ERROR: Unable to create long test program with %d lines! "
+ "(%s in %s:%d)",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
+ free(lines);
if (program != NULL)
{
- error = clReleaseProgram( program );
- test_error( error, "Unable to release a program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release a program object");
}
return -1;
}
/* Build it */
- error = clBuildProgram( program, 1, &deviceID, NULL, NULL, NULL );
- test_error( error, "Unable to build a long program" );
+ error = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
+ test_error(error, "Unable to build a long program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release a program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release a program object");
- free( lines );
+ free(lines);
return 0;
}
-int test_large_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_large_compile(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- unsigned int toTest[] = { 64, 128, 256, 512, 1024, 2048, 4096, 0 }; //8192, 16384, 32768, 0 };
+ unsigned int toTest[] = {
+ 64, 128, 256, 512, 1024, 2048, 4096, 0
+ }; // 8192, 16384, 32768, 0 };
unsigned int i;
- log_info( "Testing large compiles...this might take awhile...\n" );
+ log_info("Testing large compiles...this might take awhile...\n");
- for( i = 0; toTest[ i ] != 0; i++ )
+ for (i = 0; toTest[i] != 0; i++)
{
- log_info( " %d...\n", toTest[ i ] );
+ log_info(" %d...\n", toTest[i]);
#if defined(_WIN32)
clock_t start = clock();
-#elif defined(__linux__) || defined(__APPLE__)
- timeval time1, time2;
- gettimeofday(&time1, NULL);
+#elif defined(__linux__) || defined(__APPLE__)
+ timeval time1, time2;
+ gettimeofday(&time1, NULL);
#endif
- if( test_large_single_compile( context, deviceID, toTest[ i ] ) != 0 )
+ if (test_large_single_compile(context, deviceID, toTest[i]) != 0)
{
- log_error( "ERROR: long program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__);
+ log_error(
+ "ERROR: long program test failed for %d lines! (in %s:%d)\n",
+ toTest[i], __FILE__, __LINE__);
return -1;
}
#if defined(_WIN32)
clock_t end = clock();
- log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
-#elif defined(__linux__) || defined(__APPLE__)
- gettimeofday(&time2, NULL);
- log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+ log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false,
+ "clock() time in secs", "%d lines", toTest[i]);
+#elif defined(__linux__) || defined(__APPLE__)
+ gettimeofday(&time2, NULL);
+ log_perf((float)(float)(time2.tv_sec - time1.tv_sec)
+ + 1.0e-6 * (time2.tv_usec - time1.tv_usec),
+ false, "wall time in secs", "%d lines", toTest[i]);
#endif
}
return 0;
}
-static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel);
+static int verifyCopyBuffer(cl_context context, cl_command_queue queue,
+ cl_kernel kernel);
#if defined(__APPLE__) || defined(__linux)
#define _strdup strdup
#endif
-int test_large_multi_file_library(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+int test_large_multi_file_library(cl_context context, cl_device_id deviceID,
+ cl_command_queue queue, unsigned int numLines)
{
int error;
cl_program program;
@@ -264,164 +280,194 @@ int test_large_multi_file_library(cl_context context, cl_device_id deviceID, cl_
unsigned int i;
char buffer[MAX_LINE_SIZE_IN_PROGRAM];
- simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
- if (simple_kernels == NULL) {
- log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+ simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program));
+ if (simple_kernels == NULL)
+ {
+ log_error("ERROR: Unable to allocate kernels array with %d kernels! "
+ "(in %s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
/* First, allocate the array for our line pointers */
- lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
- if (lines == NULL) {
+ lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *));
+ if (lines == NULL)
+ {
free(simple_kernels);
- log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n",
+ (2 * numLines + 2), __FILE__, __LINE__);
return -1;
}
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, composite_kernel_extern_template, i);
lines[i] = _strdup(buffer);
}
/* First and last lines are easy */
- lines[ numLines ] = composite_kernel_start;
- lines[ 2* numLines + 1] = composite_kernel_end;
+ lines[numLines] = composite_kernel_start;
+ lines[2 * numLines + 1] = composite_kernel_end;
/* Fill the rest with templated kernels */
- for( i = numLines + 1; i < 2* numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
sprintf(buffer, composite_kernel_template, i - numLines - 1);
- lines[ i ] = _strdup(buffer);
+ lines[i] = _strdup(buffer);
}
/* Try to create a program with these lines */
- error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program,
+ 2 * numLines + 2, lines);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
- free( simple_kernels );
- for( i = 0; i < numLines; i++)
+ log_error("ERROR: Unable to create long test program with %d lines! "
+ "(%s) (in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
+ free(simple_kernels);
+ for (i = 0; i < numLines; i++)
{
- free( (void*)lines[i] );
- free( (void*)lines[i+numLines+1] );
+ free((void *)lines[i]);
+ free((void *)lines[i + numLines + 1]);
}
- free( lines );
+ free(lines);
if (program != NULL)
{
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
}
return -1;
}
/* Compile it */
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
/* Create and compile templated kernels */
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, simple_kernel_template, i);
- const char* kernel_source = _strdup(buffer);
- simple_kernels[i] = clCreateProgramWithSource( context, 1, &kernel_source, NULL, &error );
- if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+ const char *kernel_source = _strdup(buffer);
+ simple_kernels[i] =
+ clCreateProgramWithSource(context, 1, &kernel_source, NULL, &error);
+ if (simple_kernels[i] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d "
+ "lines! (%s) (in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL,
+ NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- free((void*)kernel_source);
+ free((void *)kernel_source);
}
/* Create library out of compiled templated kernels */
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error);
- test_error( error, "Unable to create a multi-line library" );
+ cl_program my_newly_minted_library =
+ clLinkProgram(context, 1, &deviceID, "-create-library", numLines,
+ simple_kernels, NULL, NULL, &error);
+ test_error(error, "Unable to create a multi-line library");
- /* Link the program that calls the kernels and the library that contains them */
+ /* Link the program that calls the kernels and the library that contains
+ * them */
cl_program programs[2] = { program, my_newly_minted_library };
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error);
- test_error( error, "Unable to link a program with a library" );
+ cl_program my_newly_linked_program = clLinkProgram(
+ context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error);
+ test_error(error, "Unable to link a program with a library");
// Create the composite kernel
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
- test_error( error, "Unable to create a composite kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+ test_error(error, "Unable to create a composite kernel");
// Run the composite kernel and verify the results
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- free( (void*)lines[i] );
- free( (void*)lines[i+numLines+1] );
+ free((void *)lines[i]);
+ free((void *)lines[i + numLines + 1]);
}
- free( lines );
+ free(lines);
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- error = clReleaseProgram( simple_kernels[i] );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(simple_kernels[i]);
+ test_error(error, "Unable to release program object");
}
- free( simple_kernels );
+ free(simple_kernels);
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_multi_file_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_multi_file_libraries(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 };
+ unsigned int toTest[] = {
+ 2, 4, 8, 16, 32, 64, 128, 256, 0
+ }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 };
unsigned int i;
- log_info( "Testing multi-file libraries ...this might take awhile...\n" );
+ log_info("Testing multi-file libraries ...this might take awhile...\n");
- for( i = 0; toTest[ i ] != 0; i++ )
+ for (i = 0; toTest[i] != 0; i++)
{
- log_info( " %d...\n", toTest[ i ] );
+ log_info(" %d...\n", toTest[i]);
#if defined(_WIN32)
clock_t start = clock();
-#elif defined(__linux__) || defined(__APPLE__)
- timeval time1, time2;
- gettimeofday(&time1, NULL);
+#elif defined(__linux__) || defined(__APPLE__)
+ timeval time1, time2;
+ gettimeofday(&time1, NULL);
#endif
- if( test_large_multi_file_library( context, deviceID, queue, toTest[ i ] ) != 0 )
+ if (test_large_multi_file_library(context, deviceID, queue, toTest[i])
+ != 0)
{
- log_error( "ERROR: multi-file library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ );
+ log_error("ERROR: multi-file library program test failed for %d "
+ "lines! (in %s:%d)\n\n",
+ toTest[i], __FILE__, __LINE__);
return -1;
}
#if defined(_WIN32)
clock_t end = clock();
- log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
-#elif defined(__linux__) || defined(__APPLE__)
- gettimeofday(&time2, NULL);
- log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+ log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false,
+ "clock() time in secs", "%d lines", toTest[i]);
+#elif defined(__linux__) || defined(__APPLE__)
+ gettimeofday(&time2, NULL);
+ log_perf((float)(float)(time2.tv_sec - time1.tv_sec)
+ + 1.0e-6 * (time2.tv_usec - time1.tv_usec),
+ false, "wall time in secs", "%d lines", toTest[i]);
#endif
}
return 0;
}
-int test_large_multiple_embedded_headers(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+int test_large_multiple_embedded_headers(cl_context context,
+ cl_device_id deviceID,
+ cl_command_queue queue,
+ unsigned int numLines)
{
int error;
cl_program program;
@@ -432,29 +478,41 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device
unsigned int i;
char buffer[MAX_LINE_SIZE_IN_PROGRAM];
- simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
- if (simple_kernels == NULL) {
- log_error( "ERROR: Unable to allocate simple_kernels array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+ simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program));
+ if (simple_kernels == NULL)
+ {
+ log_error("ERROR: Unable to allocate simple_kernels array with %d "
+ "lines! (in %s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
- headers = (cl_program*)malloc(numLines*sizeof(cl_program));
- if (headers == NULL) {
- log_error( "ERROR: Unable to allocate headers array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+ headers = (cl_program *)malloc(numLines * sizeof(cl_program));
+ if (headers == NULL)
+ {
+ log_error("ERROR: Unable to allocate headers array with %d lines! (in "
+ "%s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
/* First, allocate the array for our line pointers */
- header_names = (const char**)malloc( numLines*sizeof( const char * ) );
- if (header_names == NULL) {
- log_error( "ERROR: Unable to allocate header_names array with %d lines! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+ header_names = (const char **)malloc(numLines * sizeof(const char *));
+ if (header_names == NULL)
+ {
+ log_error("ERROR: Unable to allocate header_names array with %d lines! "
+ "(in %s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
- lines = (const char **)malloc( (2*numLines + 2)*sizeof( const char * ) );
- if (lines == NULL) {
- log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ );
+ lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *));
+ if (lines == NULL)
+ {
+ log_error(
+ "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n",
+ (2 * numLines + 2), __FILE__, __LINE__);
return -1;
}
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, include_header_name_templates[i % 4], i);
lines[i] = _strdup(buffer);
@@ -462,154 +520,178 @@ int test_large_multiple_embedded_headers(cl_context context, cl_device_id device
header_names[i] = _strdup(buffer);
sprintf(buffer, composite_kernel_extern_template, i);
- const char* line = _strdup(buffer);
- error = create_single_kernel_helper_create_program(context, &headers[i], 1, &line);
- if( headers[i] == NULL || error != CL_SUCCESS )
+ const char *line = buffer;
+ error = create_single_kernel_helper_create_program(context, &headers[i],
+ 1, &line);
+ if (headers[i] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__);
+ log_error("ERROR: Unable to create a simple header program! (%s in "
+ "%s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
}
/* First and last lines are easy */
- lines[ numLines ] = composite_kernel_start;
- lines[ 2* numLines + 1 ] = composite_kernel_end;
+ lines[numLines] = composite_kernel_start;
+ lines[2 * numLines + 1] = composite_kernel_end;
/* Fill the rest with templated kernels */
- for( i = numLines + 1; i < 2* numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
sprintf(buffer, composite_kernel_template, i - numLines - 1);
- lines[ i ] = _strdup(buffer);
+ lines[i] = _strdup(buffer);
}
/* Try to create a program with these lines */
- error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program,
+ 2 * numLines + 2, lines);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d lines! "
+ "(%s) (in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(program, 1, &deviceID, NULL, numLines, headers, header_names, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, numLines, headers,
+ header_names, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
/* Create and compile templated kernels */
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, simple_kernel_template, i);
- const char* kernel_source = _strdup(buffer);
- error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
- if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+ const char *kernel_source = _strdup(buffer);
+ error = create_single_kernel_helper_create_program(
+ context, &simple_kernels[i], 1, &kernel_source);
+ if (simple_kernels[i] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d "
+ "lines! (%s) (in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL,
+ NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- free((void*)kernel_source);
+ free((void *)kernel_source);
}
/* Create library out of compiled templated kernels */
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, NULL, NULL, &error);
- test_error( error, "Unable to create a multi-line library" );
+ cl_program my_newly_minted_library =
+ clLinkProgram(context, 1, &deviceID, "-create-library", numLines,
+ simple_kernels, NULL, NULL, &error);
+ test_error(error, "Unable to create a multi-line library");
- /* Link the program that calls the kernels and the library that contains them */
+ /* Link the program that calls the kernels and the library that contains
+ * them */
cl_program programs[2] = { program, my_newly_minted_library };
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error);
- test_error( error, "Unable to link a program with a library" );
+ cl_program my_newly_linked_program = clLinkProgram(
+ context, 1, &deviceID, NULL, 2, programs, NULL, NULL, &error);
+ test_error(error, "Unable to link a program with a library");
// Create the composite kernel
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
- test_error( error, "Unable to create a composite kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+ test_error(error, "Unable to create a composite kernel");
// Run the composite kernel and verify the results
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- free( (void*)lines[i] );
- free( (void*)header_names[i] );
+ free((void *)lines[i]);
+ free((void *)header_names[i]);
}
- for( i = numLines + 1; i < 2* numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
- free( (void*)lines[i] );
+ free((void *)lines[i]);
}
- free( lines );
- free( header_names );
+ free(lines);
+ free(header_names);
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- error = clReleaseProgram( simple_kernels[i] );
- test_error( error, "Unable to release program object" );
- error = clReleaseProgram( headers[i] );
- test_error( error, "Unable to release header program object" );
+ error = clReleaseProgram(simple_kernels[i]);
+ test_error(error, "Unable to release program object");
+ error = clReleaseProgram(headers[i]);
+ test_error(error, "Unable to release header program object");
}
- free( simple_kernels );
- free( headers );
+ free(simple_kernels);
+ free(headers);
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_multiple_embedded_headers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_multiple_embedded_headers(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- unsigned int toTest[] = { 2, 4, 8, 16, 32, 64, 128, 256, 0 }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 };
+ unsigned int toTest[] = {
+ 2, 4, 8, 16, 32, 64, 128, 256, 0
+ }; // 512, 1024, 2048, 4096, 8192, 16384, 32768, 0 };
unsigned int i;
- log_info( "Testing multiple embedded headers ...this might take awhile...\n" );
+ log_info(
+ "Testing multiple embedded headers ...this might take awhile...\n");
- for( i = 0; toTest[ i ] != 0; i++ )
+ for (i = 0; toTest[i] != 0; i++)
{
- log_info( " %d...\n", toTest[ i ] );
+ log_info(" %d...\n", toTest[i]);
#if defined(_WIN32)
clock_t start = clock();
-#elif defined(__linux__) || defined(__APPLE__)
- timeval time1, time2;
- gettimeofday(&time1, NULL);
+#elif defined(__linux__) || defined(__APPLE__)
+ timeval time1, time2;
+ gettimeofday(&time1, NULL);
#endif
- if( test_large_multiple_embedded_headers( context, deviceID, queue, toTest[ i ] ) != 0 )
+ if (test_large_multiple_embedded_headers(context, deviceID, queue,
+ toTest[i])
+ != 0)
{
- log_error( "ERROR: multiple embedded headers program test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ );
+ log_error("ERROR: multiple embedded headers program test failed "
+ "for %d lines! (in %s:%d)\n",
+ toTest[i], __FILE__, __LINE__);
return -1;
}
#if defined(_WIN32)
clock_t end = clock();
- log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
-#elif defined(__linux__) || defined(__APPLE__)
- gettimeofday(&time2, NULL);
- log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+ log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false,
+ "clock() time in secs", "%d lines", toTest[i]);
+#elif defined(__linux__) || defined(__APPLE__)
+ gettimeofday(&time2, NULL);
+ log_perf((float)(float)(time2.tv_sec - time1.tv_sec)
+ + 1.0e-6 * (time2.tv_usec - time1.tv_usec),
+ false, "wall time in secs", "%d lines", toTest[i]);
#endif
}
return 0;
}
-double logbase(double a, double base)
-{
- return log(a) / log(base);
-}
+double logbase(double a, double base) { return log(a) / log(base); }
-int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+int test_large_multiple_libraries(cl_context context, cl_device_id deviceID,
+ cl_command_queue queue, unsigned int numLines)
{
int error;
cl_program *simple_kernels;
@@ -617,164 +699,202 @@ int test_large_multiple_libraries(cl_context context, cl_device_id deviceID, cl_
unsigned int i;
char buffer[MAX_LINE_SIZE_IN_PROGRAM];
/* I want to create (log2(N)+1)/2 libraries */
- unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2;
+ unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001) / 2;
unsigned int numLibraries = (unsigned int)pow(2.0, level - 1.0);
- unsigned int numFilesInLib = numLines/numLibraries;
- cl_program *my_program_and_libraries = (cl_program*)malloc((1+numLibraries)*sizeof(cl_program));
- if (my_program_and_libraries == NULL) {
- log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries), __FILE__, __LINE__);
+ unsigned int numFilesInLib = numLines / numLibraries;
+ cl_program *my_program_and_libraries =
+ (cl_program *)malloc((1 + numLibraries) * sizeof(cl_program));
+ if (my_program_and_libraries == NULL)
+ {
+ log_error("ERROR: Unable to allocate program array with %d programs! "
+ "(in %s:%d)\n",
+ (1 + numLibraries), __FILE__, __LINE__);
return -1;
}
- log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib);
+ log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level,
+ numLibraries, numFilesInLib);
- simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
- if (simple_kernels == NULL) {
- log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+ simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program));
+ if (simple_kernels == NULL)
+ {
+ log_error("ERROR: Unable to allocate kernels array with %d kernels! "
+ "(in %s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
/* First, allocate the array for our line pointers */
- lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
- if (lines == NULL) {
- log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__);
+ lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *));
+ if (lines == NULL)
+ {
+ log_error(
+ "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n",
+ (2 * numLines + 2), __FILE__, __LINE__);
return -1;
}
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, composite_kernel_extern_template, i);
lines[i] = _strdup(buffer);
}
/* First and last lines are easy */
- lines[ numLines ] = composite_kernel_start;
- lines[ 2*numLines + 1] = composite_kernel_end;
+ lines[numLines] = composite_kernel_start;
+ lines[2 * numLines + 1] = composite_kernel_end;
/* Fill the rest with templated kernels */
- for(i = numLines + 1; i < 2*numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
sprintf(buffer, composite_kernel_template, i - numLines - 1);
- lines[ i ] = _strdup(buffer);
+ lines[i] = _strdup(buffer);
}
/* Try to create a program with these lines */
- error = create_single_kernel_helper_create_program(context, &my_program_and_libraries[0], 2 * numLines + 2, lines);
- if( my_program_and_libraries[0] == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &my_program_and_libraries[0], 2 * numLines + 2, lines);
+ if (my_program_and_libraries[0] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d lines! "
+ "(%s in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(my_program_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(my_program_and_libraries[0], 1, &deviceID, NULL, 0,
+ NULL, NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
/* Create and compile templated kernels */
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, simple_kernel_template, i);
- const char* kernel_source = _strdup(buffer);
- error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
- if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+ const char *kernel_source = _strdup(buffer);
+ error = create_single_kernel_helper_create_program(
+ context, &simple_kernels[i], 1, &kernel_source);
+ if (simple_kernels[i] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d "
+ "lines! (%s in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL,
+ NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- free((void*)kernel_source);
+ free((void *)kernel_source);
}
/* Create library out of compiled templated kernels */
- for(i = 0; i < numLibraries; i++) {
- my_program_and_libraries[i+1] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+i*numFilesInLib, NULL, NULL, &error);
- test_error( error, "Unable to create a multi-line library" );
+ for (i = 0; i < numLibraries; i++)
+ {
+ my_program_and_libraries[i + 1] = clLinkProgram(
+ context, 1, &deviceID, "-create-library", numFilesInLib,
+ simple_kernels + i * numFilesInLib, NULL, NULL, &error);
+ test_error(error, "Unable to create a multi-line library");
}
- /* Link the program that calls the kernels and the library that contains them */
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1, my_program_and_libraries, NULL, NULL, &error);
- test_error( error, "Unable to link a program with a library" );
+ /* Link the program that calls the kernels and the library that contains
+ * them */
+ cl_program my_newly_linked_program =
+ clLinkProgram(context, 1, &deviceID, NULL, numLibraries + 1,
+ my_program_and_libraries, NULL, NULL, &error);
+ test_error(error, "Unable to link a program with a library");
// Create the composite kernel
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
- test_error( error, "Unable to create a composite kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+ test_error(error, "Unable to create a composite kernel");
// Run the composite kernel and verify the results
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- for(i = 0; i <= numLibraries; i++) {
- error = clReleaseProgram( my_program_and_libraries[i] );
- test_error( error, "Unable to release program object" );
+ for (i = 0; i <= numLibraries; i++)
+ {
+ error = clReleaseProgram(my_program_and_libraries[i]);
+ test_error(error, "Unable to release program object");
}
- free( my_program_and_libraries );
- for(i = 0; i < numLines; i++)
+ free(my_program_and_libraries);
+ for (i = 0; i < numLines; i++)
{
- free( (void*)lines[i] );
+ free((void *)lines[i]);
}
- for(i = numLines + 1; i < 2*numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
- free( (void*)lines[i] );
+ free((void *)lines[i]);
}
- free( lines );
+ free(lines);
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- error = clReleaseProgram( simple_kernels[i] );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(simple_kernels[i]);
+ test_error(error, "Unable to release program object");
}
- free( simple_kernels );
+ free(simple_kernels);
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_multiple_libraries(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- unsigned int toTest[] = { 2, 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 };
+ unsigned int toTest[] = {
+ 2, 8, 32, 128, 256, 0
+ }; // 512, 2048, 8192, 32768, 0 };
unsigned int i;
- log_info( "Testing multiple libraries ...this might take awhile...\n" );
+ log_info("Testing multiple libraries ...this might take awhile...\n");
- for( i = 0; toTest[ i ] != 0; i++ )
+ for (i = 0; toTest[i] != 0; i++)
{
- log_info( " %d...\n", toTest[ i ] );
+ log_info(" %d...\n", toTest[i]);
#if defined(_WIN32)
clock_t start = clock();
-#elif defined(__linux__) || defined(__APPLE__)
- timeval time1, time2;
- gettimeofday(&time1, NULL);
+#elif defined(__linux__) || defined(__APPLE__)
+ timeval time1, time2;
+ gettimeofday(&time1, NULL);
#endif
- if( test_large_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 )
+ if (test_large_multiple_libraries(context, deviceID, queue, toTest[i])
+ != 0)
{
- log_error( "ERROR: multiple library program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ );
+ log_error("ERROR: multiple library program test failed for %d "
+ "lines! (in %s:%d)\n\n",
+ toTest[i], __FILE__, __LINE__);
return -1;
}
#if defined(_WIN32)
clock_t end = clock();
- log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
-#elif defined(__linux__) || defined(__APPLE__)
- gettimeofday(&time2, NULL);
- log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+ log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false,
+ "clock() time in secs", "%d lines", toTest[i]);
+#elif defined(__linux__) || defined(__APPLE__)
+ gettimeofday(&time2, NULL);
+ log_perf((float)(float)(time2.tv_sec - time1.tv_sec)
+ + 1.0e-6 * (time2.tv_usec - time1.tv_usec),
+ false, "wall time in secs", "%d lines", toTest[i]);
#endif
}
return 0;
}
-int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+int test_large_multiple_files_multiple_libraries(cl_context context,
+ cl_device_id deviceID,
+ cl_command_queue queue,
+ unsigned int numLines)
{
int error;
cl_program *simple_kernels;
@@ -782,915 +902,1173 @@ int test_large_multiple_files_multiple_libraries(cl_context context, cl_device_i
unsigned int i;
char buffer[MAX_LINE_SIZE_IN_PROGRAM];
/* I want to create (log2(N)+1)/4 libraries */
- unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001)/2;
+ unsigned int level = (unsigned int)(logbase(numLines, 2.0) + 1.000001) / 2;
unsigned int numLibraries = (unsigned int)pow(2.0, level - 2.0);
- unsigned int numFilesInLib = numLines/(2*numLibraries);
- cl_program *my_programs_and_libraries = (cl_program*)malloc((1+numLibraries+numLibraries*numFilesInLib)*sizeof(cl_program));
- if (my_programs_and_libraries == NULL) {
- log_error( "ERROR: Unable to allocate program array with %d programs! (in %s:%d)\n", (1+numLibraries+numLibraries*numFilesInLib), __FILE__, __LINE__ );
+ unsigned int numFilesInLib = numLines / (2 * numLibraries);
+ cl_program *my_programs_and_libraries = (cl_program *)malloc(
+ (1 + numLibraries + numLibraries * numFilesInLib) * sizeof(cl_program));
+ if (my_programs_and_libraries == NULL)
+ {
+ log_error("ERROR: Unable to allocate program array with %d programs! "
+ "(in %s:%d)\n",
+ (1 + numLibraries + numLibraries * numFilesInLib), __FILE__,
+ __LINE__);
return -1;
}
- log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level, numLibraries, numFilesInLib);
+ log_info("level - %d, numLibraries - %d, numFilesInLib - %d\n", level,
+ numLibraries, numFilesInLib);
- simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
- if (simple_kernels == NULL) {
- log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__ );
+ simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program));
+ if (simple_kernels == NULL)
+ {
+ log_error("ERROR: Unable to allocate kernels array with %d kernels! "
+ "(in %s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
/* First, allocate the array for our line pointers */
- lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
- if (lines == NULL) {
- log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__ );
+ lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *));
+ if (lines == NULL)
+ {
+ log_error(
+ "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n",
+ (2 * numLines + 2), __FILE__, __LINE__);
return -1;
}
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, composite_kernel_extern_template, i);
lines[i] = _strdup(buffer);
}
/* First and last lines are easy */
- lines[ numLines ] = composite_kernel_start;
- lines[ 2*numLines + 1] = composite_kernel_end;
+ lines[numLines] = composite_kernel_start;
+ lines[2 * numLines + 1] = composite_kernel_end;
/* Fill the rest with templated kernels */
- for(i = numLines + 1; i < 2*numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
sprintf(buffer, composite_kernel_template, i - numLines - 1);
- lines[ i ] = _strdup(buffer);
+ lines[i] = _strdup(buffer);
}
/* Try to create a program with these lines */
- error = create_single_kernel_helper_create_program(context, &my_programs_and_libraries[0], 2 * numLines + 2, lines);
- if( my_programs_and_libraries[0] == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &my_programs_and_libraries[0], 2 * numLines + 2, lines);
+ if (my_programs_and_libraries[0] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d lines! "
+ "(%s in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(my_programs_and_libraries[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(my_programs_and_libraries[0], 1, &deviceID, NULL,
+ 0, NULL, NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
/* Create and compile templated kernels */
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, simple_kernel_template, i);
- const char* kernel_source = _strdup(buffer);
- error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
- if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+ const char *kernel_source = _strdup(buffer);
+ error = create_single_kernel_helper_create_program(
+ context, &simple_kernels[i], 1, &kernel_source);
+ if (simple_kernels[i] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d "
+ "lines! (%s in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL,
+ NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- free((void*)kernel_source);
+ free((void *)kernel_source);
}
/* Copy already compiled kernels */
- for( i = 0; i < numLibraries*numFilesInLib; i++) {
- my_programs_and_libraries[i+1] = simple_kernels[i];
+ for (i = 0; i < numLibraries * numFilesInLib; i++)
+ {
+ my_programs_and_libraries[i + 1] = simple_kernels[i];
}
/* Create library out of compiled templated kernels */
- for( i = 0; i < numLibraries; i++) {
- my_programs_and_libraries[i+1+numLibraries*numFilesInLib] = clLinkProgram(context, 1, &deviceID, "-create-library", numFilesInLib, simple_kernels+(i*numFilesInLib+numLibraries*numFilesInLib), NULL, NULL, &error);
- test_error( error, "Unable to create a multi-line library" );
+ for (i = 0; i < numLibraries; i++)
+ {
+ my_programs_and_libraries[i + 1 + numLibraries * numFilesInLib] =
+ clLinkProgram(
+ context, 1, &deviceID, "-create-library", numFilesInLib,
+ simple_kernels
+ + (i * numFilesInLib + numLibraries * numFilesInLib),
+ NULL, NULL, &error);
+ test_error(error, "Unable to create a multi-line library");
}
- /* Link the program that calls the kernels and the library that contains them */
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, numLibraries+1+numLibraries*numFilesInLib, my_programs_and_libraries, NULL, NULL, &error);
- test_error( error, "Unable to link a program with a library" );
+ /* Link the program that calls the kernels and the library that contains
+ * them */
+ cl_program my_newly_linked_program =
+ clLinkProgram(context, 1, &deviceID, NULL,
+ numLibraries + 1 + numLibraries * numFilesInLib,
+ my_programs_and_libraries, NULL, NULL, &error);
+ test_error(error, "Unable to link a program with a library");
// Create the composite kernel
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
- test_error( error, "Unable to create a composite kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+ test_error(error, "Unable to create a composite kernel");
// Run the composite kernel and verify the results
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- for(i = 0; i < numLibraries+1+numLibraries*numFilesInLib; i++) {
- error = clReleaseProgram( my_programs_and_libraries[i] );
- test_error( error, "Unable to release program object" );
+ for (i = 0; i < numLibraries + 1 + numLibraries * numFilesInLib; i++)
+ {
+ error = clReleaseProgram(my_programs_and_libraries[i]);
+ test_error(error, "Unable to release program object");
}
- free( my_programs_and_libraries );
+ free(my_programs_and_libraries);
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- free( (void*)lines[i] );
+ free((void *)lines[i]);
}
- for(i = numLines + 1; i < 2*numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
- free( (void*)lines[i] );
+ free((void *)lines[i]);
}
- free( lines );
+ free(lines);
- for(i = numLibraries*numFilesInLib; i < numLines; i++)
+ for (i = numLibraries * numFilesInLib; i < numLines; i++)
{
- error = clReleaseProgram( simple_kernels[i] );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(simple_kernels[i]);
+ test_error(error, "Unable to release program object");
}
- free( simple_kernels );
+ free(simple_kernels);
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_multiple_files_multiple_libraries(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_multiple_files_multiple_libraries(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
- unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 };
+ unsigned int toTest[] = { 8, 32, 128, 256,
+ 0 }; // 512, 2048, 8192, 32768, 0 };
unsigned int i;
- log_info( "Testing multiple files and multiple libraries ...this might take awhile...\n" );
+ log_info("Testing multiple files and multiple libraries ...this might take "
+ "awhile...\n");
- for( i = 0; toTest[ i ] != 0; i++ )
+ for (i = 0; toTest[i] != 0; i++)
{
- log_info( " %d...\n", toTest[ i ] );
+ log_info(" %d...\n", toTest[i]);
#if defined(_WIN32)
clock_t start = clock();
-#elif defined(__linux__) || defined(__APPLE__)
- timeval time1, time2;
- gettimeofday(&time1, NULL);
+#elif defined(__linux__) || defined(__APPLE__)
+ timeval time1, time2;
+ gettimeofday(&time1, NULL);
#endif
- if( test_large_multiple_files_multiple_libraries( context, deviceID, queue, toTest[ i ] ) != 0 )
+ if (test_large_multiple_files_multiple_libraries(context, deviceID,
+ queue, toTest[i])
+ != 0)
{
- log_error( "ERROR: multiple files, multiple libraries program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ );
+ log_error("ERROR: multiple files, multiple libraries program test "
+ "failed for %d lines! (in %s:%d)\n\n",
+ toTest[i], __FILE__, __LINE__);
return -1;
}
#if defined(_WIN32)
clock_t end = clock();
- log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
-#elif defined(__linux__) || defined(__APPLE__)
- gettimeofday(&time2, NULL);
- log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+ log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false,
+ "clock() time in secs", "%d lines", toTest[i]);
+#elif defined(__linux__) || defined(__APPLE__)
+ gettimeofday(&time2, NULL);
+ log_perf((float)(float)(time2.tv_sec - time1.tv_sec)
+ + 1.0e-6 * (time2.tv_usec - time1.tv_usec),
+ false, "wall time in secs", "%d lines", toTest[i]);
#endif
}
return 0;
}
-int test_large_multiple_files(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+int test_large_multiple_files(cl_context context, cl_device_id deviceID,
+ cl_command_queue queue, unsigned int numLines)
{
int error;
const char **lines;
unsigned int i;
char buffer[MAX_LINE_SIZE_IN_PROGRAM];
- cl_program *my_programs = (cl_program*)malloc((1+numLines)*sizeof(cl_program));
+ cl_program *my_programs =
+ (cl_program *)malloc((1 + numLines) * sizeof(cl_program));
- if (my_programs == NULL) {
- log_error( "ERROR: Unable to allocate my_programs array with %d programs! (in %s:%d)\n", (1+numLines), __FILE__, __LINE__);
+ if (my_programs == NULL)
+ {
+ log_error("ERROR: Unable to allocate my_programs array with %d "
+ "programs! (in %s:%d)\n",
+ (1 + numLines), __FILE__, __LINE__);
return -1;
}
/* First, allocate the array for our line pointers */
- lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
- if (lines == NULL) {
- log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__);
+ lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *));
+ if (lines == NULL)
+ {
+ log_error(
+ "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n",
+ (2 * numLines + 2), __FILE__, __LINE__);
return -1;
}
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, composite_kernel_extern_template, i);
lines[i] = _strdup(buffer);
}
/* First and last lines are easy */
- lines[ numLines ] = composite_kernel_start;
- lines[ 2* numLines + 1] = composite_kernel_end;
+ lines[numLines] = composite_kernel_start;
+ lines[2 * numLines + 1] = composite_kernel_end;
/* Fill the rest with templated kernels */
- for(i = numLines + 1; i < 2*numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
sprintf(buffer, composite_kernel_template, i - numLines - 1);
- lines[ i ] = _strdup(buffer);
+ lines[i] = _strdup(buffer);
}
/* Try to create a program with these lines */
- error = create_single_kernel_helper_create_program(context, &my_programs[0], 2 * numLines + 2, lines);
- if( my_programs[0] == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &my_programs[0],
+ 2 * numLines + 2, lines);
+ if (my_programs[0] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d lines! "
+ "(%s in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(my_programs[0], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(my_programs[0], 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
/* Create and compile templated kernels */
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, simple_kernel_template, i);
- const char* kernel_source = _strdup(buffer);
- error = create_single_kernel_helper_create_program(context, &my_programs[i + 1], 1, &kernel_source);
- if( my_programs[i+1] == NULL || error != CL_SUCCESS )
+ const char *kernel_source = _strdup(buffer);
+ error = create_single_kernel_helper_create_program(
+ context, &my_programs[i + 1], 1, &kernel_source);
+ if (my_programs[i + 1] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d "
+ "lines! (%s in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(my_programs[i+1], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(my_programs[i + 1], 1, &deviceID, NULL, 0,
+ NULL, NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- free((void*)kernel_source);
+ free((void *)kernel_source);
}
- /* Link the program that calls the kernels and the library that contains them */
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1+numLines, my_programs, NULL, NULL, &error);
- test_error( error, "Unable to link a program with a library" );
+ /* Link the program that calls the kernels and the library that contains
+ * them */
+ cl_program my_newly_linked_program =
+ clLinkProgram(context, 1, &deviceID, NULL, 1 + numLines, my_programs,
+ NULL, NULL, &error);
+ test_error(error, "Unable to link a program with a library");
// Create the composite kernel
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
- test_error( error, "Unable to create a composite kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+ test_error(error, "Unable to create a composite kernel");
// Run the composite kernel and verify the results
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- for(i = 0; i < 1+numLines; i++) {
- error = clReleaseProgram( my_programs[i] );
- test_error( error, "Unable to release program object" );
+ for (i = 0; i < 1 + numLines; i++)
+ {
+ error = clReleaseProgram(my_programs[i]);
+ test_error(error, "Unable to release program object");
}
- free( my_programs );
- for(i = 0; i < numLines; i++)
+ free(my_programs);
+ for (i = 0; i < numLines; i++)
{
- free( (void*)lines[i] );
+ free((void *)lines[i]);
}
- for(i = numLines + 1; i < 2*numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
- free( (void*)lines[i] );
+ free((void *)lines[i]);
}
- free( lines );
+ free(lines);
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_multiple_files(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_multiple_files(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
- unsigned int toTest[] = { 8, 32, 128, 256, 0 }; // 512, 2048, 8192, 32768, 0 };
+ unsigned int toTest[] = { 8, 32, 128, 256,
+ 0 }; // 512, 2048, 8192, 32768, 0 };
unsigned int i;
- log_info( "Testing multiple files compilation and linking into a single executable ...this might take awhile...\n" );
+ log_info("Testing multiple files compilation and linking into a single "
+ "executable ...this might take awhile...\n");
- for( i = 0; toTest[ i ] != 0; i++ )
+ for (i = 0; toTest[i] != 0; i++)
{
- log_info( " %d...\n", toTest[ i ] );
+ log_info(" %d...\n", toTest[i]);
#if defined(_WIN32)
clock_t start = clock();
-#elif defined(__linux__) || defined(__APPLE__)
- timeval time1, time2;
- gettimeofday(&time1, NULL);
+#elif defined(__linux__) || defined(__APPLE__)
+ timeval time1, time2;
+ gettimeofday(&time1, NULL);
#endif
- if( test_large_multiple_files( context, deviceID, queue, toTest[ i ] ) != 0 )
+ if (test_large_multiple_files(context, deviceID, queue, toTest[i]) != 0)
{
- log_error( "ERROR: multiple files program test failed for %d lines! (in %s:%d)\n\n", toTest[ i ], __FILE__, __LINE__ );
+ log_error("ERROR: multiple files program test failed for %d lines! "
+ "(in %s:%d)\n\n",
+ toTest[i], __FILE__, __LINE__);
return -1;
}
#if defined(_WIN32)
clock_t end = clock();
- log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
-#elif defined(__linux__) || defined(__APPLE__)
- gettimeofday(&time2, NULL);
- log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+ log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false,
+ "clock() time in secs", "%d lines", toTest[i]);
+#elif defined(__linux__) || defined(__APPLE__)
+ gettimeofday(&time2, NULL);
+ log_perf((float)(float)(time2.tv_sec - time1.tv_sec)
+ + 1.0e-6 * (time2.tv_usec - time1.tv_usec),
+ false, "wall time in secs", "%d lines", toTest[i]);
#endif
}
return 0;
}
-int test_simple_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_compile_only(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
log_info("Testing a simple compilation only...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_static_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_static_compile_only(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
log_info("Testing a simple static compilations only...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_var);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &compile_static_var);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple static variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a simple static variable test "
+ "program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
log_info("Compiling a static variable...\n");
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple static variable program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple static variable program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_struct);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &compile_static_struct);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple static struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a simple static struct test "
+ "program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
log_info("Compiling a static struct...\n");
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple static variable program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple static variable program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = create_single_kernel_helper_create_program(context, &program, 1, &compile_static_function);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &program, 1, &compile_static_function);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple static function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a simple static function test "
+ "program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
log_info("Compiling a static function...\n");
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple static function program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple static function program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_extern_compile_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_extern_compile_only(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
log_info("Testing a simple extern compilations only...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_header);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_header);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple extern kernel test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a simple extern kernel test "
+ "program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
log_info("Compiling an extern kernel...\n");
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple extern kernel program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple extern kernel program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_var);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &compile_extern_var);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple extern variable test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a simple extern variable test "
+ "program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
log_info("Compiling an extern variable...\n");
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple extern variable program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple extern variable program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_struct);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &compile_extern_struct);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple extern struct test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a simple extern struct test "
+ "program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
log_info("Compiling an extern struct...\n");
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple extern variable program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple extern variable program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = create_single_kernel_helper_create_program(context, &program, 1, &compile_extern_function);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &program, 1, &compile_extern_function);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple extern function test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a simple extern function test "
+ "program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
log_info("Compiling an extern function...\n");
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple extern function program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple extern function program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
return 0;
}
-struct simple_user_data {
- const char* m_message;
- cl_event m_event;
+struct simple_user_data
+{
+ const char *m_message;
+ cl_event m_event;
};
-const char* once_upon_a_midnight_dreary = "Once upon a midnight dreary!";
+const char *once_upon_a_midnight_dreary = "Once upon a midnight dreary!";
-static void CL_CALLBACK simple_compile_callback(cl_program program, void* user_data)
+static void CL_CALLBACK simple_compile_callback(cl_program program,
+ void *user_data)
{
- simple_user_data* simple_compile_user_data = (simple_user_data*)user_data;
- log_info("in the simple_compile_callback: program %p just completed compiling with '%s'\n", program, simple_compile_user_data->m_message);
- if (strcmp(once_upon_a_midnight_dreary, simple_compile_user_data->m_message) != 0)
+ simple_user_data *simple_compile_user_data = (simple_user_data *)user_data;
+ log_info("in the simple_compile_callback: program %p just completed "
+ "compiling with '%s'\n",
+ program, simple_compile_user_data->m_message);
+ if (strcmp(once_upon_a_midnight_dreary, simple_compile_user_data->m_message)
+ != 0)
{
- log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s (in %s:%d)!\n", once_upon_a_midnight_dreary, simple_compile_user_data->m_message, __FILE__, __LINE__);
+ log_error("ERROR: in the simple_compile_callback: Expected '%s' and "
+ "got %s (in %s:%d)!\n",
+ once_upon_a_midnight_dreary,
+ simple_compile_user_data->m_message, __FILE__, __LINE__);
}
int error;
- log_info("in the simple_compile_callback: program %p just completed compiling with '%p'\n", program, simple_compile_user_data->m_event);
+ log_info("in the simple_compile_callback: program %p just completed "
+ "compiling with '%p'\n",
+ program, simple_compile_user_data->m_event);
- error = clSetUserEventStatus(simple_compile_user_data->m_event, CL_COMPLETE);
+ error =
+ clSetUserEventStatus(simple_compile_user_data->m_event, CL_COMPLETE);
if (error != CL_SUCCESS)
{
- log_error( "ERROR: in the simple_compile_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: in the simple_compile_callback: Unable to set user "
+ "event status to CL_COMPLETE! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
exit(-1);
}
- log_info("in the simple_compile_callback: Successfully signaled compile_program_completion_event!\n");
+ log_info("in the simple_compile_callback: Successfully signaled "
+ "compile_program_completion_event!\n");
}
-int test_simple_compile_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_compile_with_callback(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_event compile_program_completion_event;
log_info("Testing a simple compilation with callback...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
compile_program_completion_event = clCreateUserEvent(context, &error);
- test_error( error, "Unable to create a user event");
+ test_error(error, "Unable to create a user event");
- simple_user_data simple_compile_user_data = {once_upon_a_midnight_dreary, compile_program_completion_event};
+ simple_user_data simple_compile_user_data = {
+ once_upon_a_midnight_dreary, compile_program_completion_event
+ };
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, simple_compile_callback, (void*)&simple_compile_user_data);
- test_error( error, "Unable to compile a simple program with a callback" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL,
+ simple_compile_callback,
+ (void *)&simple_compile_user_data);
+ test_error(error, "Unable to compile a simple program with a callback");
error = clWaitForEvents(1, &compile_program_completion_event);
- test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event");
+ test_error(error,
+ "clWaitForEvents failed when waiting on "
+ "compile_program_completion_event");
/* All done! */
error = clReleaseEvent(compile_program_completion_event);
- test_error( error, "Unable to release event object" );
+ test_error(error, "Unable to release event object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_embedded_header_compile(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_embedded_header_compile(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, header;
log_info("Testing a simple embedded header compile only...\n");
- program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
- if( program == NULL || error != CL_SUCCESS )
+ program = clCreateProgramWithSource(
+ context, 1, &another_simple_kernel_with_header, NULL, &error);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
- if( header == NULL || error != CL_SUCCESS )
+ header =
+ clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
+ if (header == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple header program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL);
- test_error( error, "Unable to compile a simple program with embedded header" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header,
+ &simple_header_name, NULL, NULL);
+ test_error(error,
+ "Unable to compile a simple program with embedded header");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( header );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(header);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_link_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_link_only(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
log_info("Testing a simple linking only...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to link a simple program" );
+ cl_program my_newly_linked_program = clLinkProgram(
+ context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
+ test_error(error, "Unable to link a simple program");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_two_file_regular_variable_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_two_file_regular_variable_access(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, second_program, my_newly_linked_program;
- const char* sources[2] = {simple_kernel, compile_regular_var}; // here we want to avoid linking error due to lack of kernels
- log_info("Compiling and linking two program objects, where one tries to access regular variable from another...\n");
- error = create_single_kernel_helper_create_program(context, &program, 2, sources);
- if( program == NULL || error != CL_SUCCESS )
+ const char *sources[2] = {
+ simple_kernel, compile_regular_var
+ }; // here we want to avoid linking error due to lack of kernels
+ log_info("Compiling and linking two program objects, where one tries to "
+ "access regular variable from another...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 2,
+ sources);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a test program with regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a test program with regular "
+ "variable! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program with regular function" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error,
+ "Unable to compile a simple program with regular function");
- error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_var_access);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &second_program, 1, &link_static_var_access);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a test program that tries to access a regular variable! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a test program that tries to access "
+ "a regular variable! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a program that tries to access a regular variable" );
+ error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(
+ error,
+ "Unable to compile a program that tries to access a regular variable");
cl_program two_programs[2] = { program, second_program };
- my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error);
- test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular variable" );
+ my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2,
+ two_programs, NULL, NULL, &error);
+ test_error(error,
+ "clLinkProgram: Expected a different error code while linking a "
+ "program that tries to access a regular variable");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( second_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(second_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_two_file_regular_struct_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_two_file_regular_struct_access(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, second_program, my_newly_linked_program;
- const char* sources[2] = {simple_kernel, compile_regular_struct}; // here we want to avoid linking error due to lack of kernels
- log_info("Compiling and linking two program objects, where one tries to access regular struct from another...\n");
- error = create_single_kernel_helper_create_program(context, &program, 2, sources);
- if( program == NULL || error != CL_SUCCESS )
+ const char *sources[2] = {
+ simple_kernel, compile_regular_struct
+ }; // here we want to avoid linking error due to lack of kernels
+ log_info("Compiling and linking two program objects, where one tries to "
+ "access regular struct from another...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 2,
+ sources);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a test program with regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a test program with regular struct! "
+ "(%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program with regular struct" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program with regular struct");
- error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_struct_access);
- if( second_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &second_program, 1, &link_static_struct_access);
+ if (second_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a test program that tries to access a regular struct! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a test program that tries to access "
+ "a regular struct! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a program that tries to access a regular struct" );
+ error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(
+ error,
+ "Unable to compile a program that tries to access a regular struct");
cl_program two_programs[2] = { program, second_program };
- my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error);
- test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular struct" );
+ my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2,
+ two_programs, NULL, NULL, &error);
+ test_error(error,
+ "clLinkProgram: Expected a different error code while linking a "
+ "program that tries to access a regular struct");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( second_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(second_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_two_file_regular_function_access(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_two_file_regular_function_access(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, second_program, my_newly_linked_program;
- const char* sources[2] = {simple_kernel, compile_regular_function}; // here we want to avoid linking error due to lack of kernels
- log_info("Compiling and linking two program objects, where one tries to access regular function from another...\n");
- error = create_single_kernel_helper_create_program(context, &program, 2, sources);
- if( program == NULL || error != CL_SUCCESS )
+ const char *sources[2] = {
+ simple_kernel, compile_regular_function
+ }; // here we want to avoid linking error due to lack of kernels
+ log_info("Compiling and linking two program objects, where one tries to "
+ "access regular function from another...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 2,
+ sources);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a test program with regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a test program with regular "
+ "function! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program with regular function" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error,
+ "Unable to compile a simple program with regular function");
- error = create_single_kernel_helper_create_program(context, &second_program, 1, &link_static_function_access);
- if( second_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &second_program, 1, &link_static_function_access);
+ if (second_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a test program that tries to access a regular function! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create a test program that tries to access "
+ "a regular function! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a program that tries to access a regular function" );
+ error = clCompileProgram(second_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(
+ error,
+ "Unable to compile a program that tries to access a regular function");
cl_program two_programs[2] = { program, second_program };
- my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, two_programs, NULL, NULL, &error);
- test_error( error, "clLinkProgram: Expected a different error code while linking a program that tries to access a regular function" );
+ my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2,
+ two_programs, NULL, NULL, &error);
+ test_error(error,
+ "clLinkProgram: Expected a different error code while linking a "
+ "program that tries to access a regular function");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( second_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(second_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_embedded_header_link(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program, header, simple_program;
log_info("Testing a simple embedded header link...\n");
- program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
- if( program == NULL || error != CL_SUCCESS )
+ program = clCreateProgramWithSource(
+ context, 1, &another_simple_kernel_with_header, NULL, &error);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
- if( header == NULL || error != CL_SUCCESS )
+ header =
+ clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
+ if (header == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple header program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL);
- test_error( error, "Unable to compile a simple program with embedded header" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header,
+ &simple_header_name, NULL, NULL);
+ test_error(error,
+ "Unable to compile a simple program with embedded header");
- error = create_single_kernel_helper_create_program(context, &simple_program, 1, &simple_kernel);
- if( simple_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &simple_program,
+ 1, &simple_kernel);
+ if (simple_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
cl_program two_programs[2] = { program, simple_program };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" );
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+ test_error(error,
+ "Unable to create an executable from two binaries, one compiled "
+ "with embedded header");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( header );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(header);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( simple_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(simple_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-const char* when_i_pondered_weak_and_weary = "When I pondered weak and weary!";
+const char *when_i_pondered_weak_and_weary = "When I pondered weak and weary!";
-static void CL_CALLBACK simple_link_callback(cl_program program, void* user_data)
+static void CL_CALLBACK simple_link_callback(cl_program program,
+ void *user_data)
{
- simple_user_data* simple_link_user_data = (simple_user_data*)user_data;
- log_info("in the simple_link_callback: program %p just completed linking with '%s'\n", program, (const char*)simple_link_user_data->m_message);
- if (strcmp(when_i_pondered_weak_and_weary, simple_link_user_data->m_message) != 0)
+ simple_user_data *simple_link_user_data = (simple_user_data *)user_data;
+ log_info("in the simple_link_callback: program %p just completed linking "
+ "with '%s'\n",
+ program, (const char *)simple_link_user_data->m_message);
+ if (strcmp(when_i_pondered_weak_and_weary, simple_link_user_data->m_message)
+ != 0)
{
- log_error("ERROR: in the simple_compile_callback: Expected '%s' and got %s! (in %s:%d)\n", when_i_pondered_weak_and_weary, simple_link_user_data->m_message, __FILE__, __LINE__);
+ log_error("ERROR: in the simple_compile_callback: Expected '%s' and "
+ "got %s! (in %s:%d)\n",
+ when_i_pondered_weak_and_weary,
+ simple_link_user_data->m_message, __FILE__, __LINE__);
}
int error;
- log_info("in the simple_link_callback: program %p just completed linking with '%p'\n", program, simple_link_user_data->m_event);
+ log_info("in the simple_link_callback: program %p just completed linking "
+ "with '%p'\n",
+ program, simple_link_user_data->m_event);
error = clSetUserEventStatus(simple_link_user_data->m_event, CL_COMPLETE);
if (error != CL_SUCCESS)
{
- log_error( "ERROR: simple_link_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: simple_link_callback: Unable to set user event "
+ "status to CL_COMPLETE! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
exit(-1);
}
- log_info("in the simple_link_callback: Successfully signaled link_program_completion_event event!\n");
+ log_info("in the simple_link_callback: Successfully signaled "
+ "link_program_completion_event event!\n");
}
-int test_simple_link_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_link_with_callback(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_event link_program_completion_event;
log_info("Testing a simple linking with callback...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
link_program_completion_event = clCreateUserEvent(context, &error);
- test_error( error, "Unable to create a user event");
+ test_error(error, "Unable to create a user event");
- simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event};
+ simple_user_data simple_link_user_data = { when_i_pondered_weak_and_weary,
+ link_program_completion_event };
- cl_program my_linked_library = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, simple_link_callback, (void*)&simple_link_user_data, &error);
- test_error( error, "Unable to link a simple program" );
+ cl_program my_linked_library = clLinkProgram(
+ context, 1, &deviceID, NULL, 1, &program, simple_link_callback,
+ (void *)&simple_link_user_data, &error);
+ test_error(error, "Unable to link a simple program");
error = clWaitForEvents(1, &link_program_completion_event);
- test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event");
+ test_error(
+ error,
+ "clWaitForEvents failed when waiting on link_program_completion_event");
/* All done! */
error = clReleaseEvent(link_program_completion_event);
- test_error( error, "Unable to release event object" );
+ test_error(error, "Unable to release event object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_linked_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_linked_library);
+ test_error(error, "Unable to release program object");
return 0;
}
-static void initBuffer(float* & srcBuffer, unsigned int cnDimension)
+static void initBuffer(float *&srcBuffer, unsigned int cnDimension)
{
float num = 0.0f;
- for( unsigned int i = 0; i < cnDimension; i++ )
+ for (unsigned int i = 0; i < cnDimension; i++)
{
- if( ( i % 10 ) == 0 )
+ if ((i % 10) == 0)
{
num = 0.0f;
}
- srcBuffer[ i ] = num;
+ srcBuffer[i] = num;
num = num + 1.0f;
}
}
-static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kernel kernel)
+static int verifyCopyBuffer(cl_context context, cl_command_queue queue,
+ cl_kernel kernel)
{
int error, result = CL_SUCCESS;
const size_t cnDimension = 32;
// Allocate source buffer
- float * srcBuffer = (float*)malloc(cnDimension * sizeof(float));
- float * dstBuffer = (float*)malloc(cnDimension * sizeof(float));
+ float *srcBuffer = (float *)malloc(cnDimension * sizeof(float));
+ float *dstBuffer = (float *)malloc(cnDimension * sizeof(float));
- if (srcBuffer == NULL) {
- log_error( "ERROR: Unable to allocate srcBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__);
+ if (srcBuffer == NULL)
+ {
+ log_error("ERROR: Unable to allocate srcBuffer float array with %lu "
+ "floats! (in %s:%d)\n",
+ cnDimension, __FILE__, __LINE__);
return -1;
}
- if (dstBuffer == NULL) {
- log_error( "ERROR: Unable to allocate dstBuffer float array with %lu floats! (in %s:%d)\n", cnDimension, __FILE__, __LINE__);
+ if (dstBuffer == NULL)
+ {
+ log_error("ERROR: Unable to allocate dstBuffer float array with %lu "
+ "floats! (in %s:%d)\n",
+ cnDimension, __FILE__, __LINE__);
return -1;
}
- if( srcBuffer && dstBuffer )
+ if (srcBuffer && dstBuffer)
{
// initialize host memory
- initBuffer(srcBuffer, cnDimension );
+ initBuffer(srcBuffer, cnDimension);
// Allocate device memory
- cl_mem deviceMemSrc = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
- cnDimension * sizeof( cl_float ), srcBuffer, &error);
- test_error( error, "Unable to create a source memory buffer" );
+ cl_mem deviceMemSrc =
+ clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ cnDimension * sizeof(cl_float), srcBuffer, &error);
+ test_error(error, "Unable to create a source memory buffer");
- cl_mem deviceMemDst = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
- cnDimension * sizeof( cl_float ), 0, &error);
- test_error( error, "Unable to create a destination memory buffer" );
+ cl_mem deviceMemDst =
+ clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ cnDimension * sizeof(cl_float), 0, &error);
+ test_error(error, "Unable to create a destination memory buffer");
// Set kernel args
// Set parameter 0 to be the source buffer
- error = clSetKernelArg(kernel, 0, sizeof( cl_mem ), ( void * )&deviceMemSrc );
- test_error( error, "Unable to set the first kernel argument" );
+ error =
+ clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&deviceMemSrc);
+ test_error(error, "Unable to set the first kernel argument");
// Set parameter 1 to be the destination buffer
- error = clSetKernelArg(kernel, 1, sizeof( cl_mem ), ( void * )&deviceMemDst );
- test_error( error, "Unable to set the second kernel argument" );
+ error =
+ clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&deviceMemDst);
+ test_error(error, "Unable to set the second kernel argument");
// Execute kernel
- error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
- &cnDimension, 0, 0, NULL, NULL );
- test_error( error, "Unable to enqueue kernel" );
+ error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &cnDimension, 0,
+ 0, NULL, NULL);
+ test_error(error, "Unable to enqueue kernel");
- error = clFlush( queue );
- test_error( error, "Unable to flush the queue" );
+ error = clFlush(queue);
+ test_error(error, "Unable to flush the queue");
// copy results from device back to host
- error = clEnqueueReadBuffer(queue, deviceMemDst, CL_TRUE, 0, cnDimension * sizeof( cl_float ),
- dstBuffer, 0, NULL, NULL );
- test_error( error, "Unable to read the destination buffer" );
+ error = clEnqueueReadBuffer(queue, deviceMemDst, CL_TRUE, 0,
+ cnDimension * sizeof(cl_float), dstBuffer,
+ 0, NULL, NULL);
+ test_error(error, "Unable to read the destination buffer");
- error = clFlush( queue );
- test_error( error, "Unable to flush the queue" );
+ error = clFlush(queue);
+ test_error(error, "Unable to flush the queue");
// Compare the source and destination buffers
- const int* pSrc = (int*)srcBuffer;
- const int* pDst = (int*)dstBuffer;
+ const int *pSrc = (int *)srcBuffer;
+ const int *pDst = (int *)dstBuffer;
int mismatch = 0;
- for( size_t i = 0; i < cnDimension; i++ )
+ for (size_t i = 0; i < cnDimension; i++)
{
- if( pSrc[i] != pDst[i] )
+ if (pSrc[i] != pDst[i])
{
- if( mismatch < 4 )
+ if (mismatch < 4)
{
- log_info("Offset %08lX: Expected %08X, Got %08X\n", i * 4, pSrc[i], pDst[i] );
+ log_info("Offset %08lX: Expected %08X, Got %08X\n", i * 4,
+ pSrc[i], pDst[i]);
}
else
{
@@ -1700,9 +2078,9 @@ static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kerne
}
}
- if( mismatch )
+ if (mismatch)
{
- log_info("*** %d mismatches found, TEST FAILS! ***\n", mismatch );
+ log_info("*** %d mismatches found, TEST FAILS! ***\n", mismatch);
result = -1;
}
else
@@ -1710,806 +2088,989 @@ static int verifyCopyBuffer(cl_context context, cl_command_queue queue, cl_kerne
log_info("Buffers match, test passes.\n");
}
- free( srcBuffer );
+ free(srcBuffer);
srcBuffer = NULL;
- free( dstBuffer );
+ free(dstBuffer);
dstBuffer = NULL;
- if( deviceMemSrc )
+ if (deviceMemSrc)
{
- error = clReleaseMemObject( deviceMemSrc );
- test_error( error, "Unable to release memory object" );
+ error = clReleaseMemObject(deviceMemSrc);
+ test_error(error, "Unable to release memory object");
}
- if( deviceMemDst )
+ if (deviceMemDst)
{
- error = clReleaseMemObject( deviceMemDst );
- test_error( error, "Unable to release memory object" );
+ error = clReleaseMemObject(deviceMemDst);
+ test_error(error, "Unable to release memory object");
}
}
return result;
}
-int test_execute_after_simple_compile_and_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_simple_compile_and_link(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program;
log_info("Testing execution after a simple compile and link...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to link a simple program" );
+ cl_program my_newly_linked_program = clLinkProgram(
+ context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
+ test_error(error, "Unable to link a simple program");
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_execute_after_simple_compile_and_link_no_device_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_simple_compile_and_link_no_device_info(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program;
- log_info("Testing execution after a simple compile and link with no device information provided...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ log_info("Testing execution after a simple compile and link with no device "
+ "information provided...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
error = clCompileProgram(program, 0, NULL, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_linked_program = clLinkProgram(context, 0, NULL, NULL, 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to link a simple program" );
+ cl_program my_newly_linked_program =
+ clLinkProgram(context, 0, NULL, NULL, 1, &program, NULL, NULL, &error);
+ test_error(error, "Unable to link a simple program");
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_execute_after_simple_compile_and_link_with_defines(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_simple_compile_and_link_with_defines(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program;
- log_info("Testing execution after a simple compile and link with defines...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel_with_defines, "-DFIRST=5 -DSECOND=37");
- if( program == NULL || error != CL_SUCCESS )
+ log_info(
+ "Testing execution after a simple compile and link with defines...\n");
+ error = create_single_kernel_helper_create_program(
+ context, &program, 1, &simple_kernel_with_defines,
+ "-DFIRST=5 -DSECOND=37");
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, "-DFIRST=5 -DSECOND=37", 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, "-DFIRST=5 -DSECOND=37", 0,
+ NULL, NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to link a simple program" );
+ cl_program my_newly_linked_program = clLinkProgram(
+ context, 1, &deviceID, NULL, 1, &program, NULL, NULL, &error);
+ test_error(error, "Unable to link a simple program");
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_execute_after_serialize_reload_object(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_serialize_reload_object(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program;
- size_t binarySize;
+ size_t binarySize;
unsigned char *binary;
- log_info("Testing execution after serialization and reloading of the object...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ log_info("Testing execution after serialization and reloading of the "
+ "object...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
// Get the size of the resulting binary (only one device)
- error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
- test_error( error, "Unable to get binary size" );
+ error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
+ sizeof(binarySize), &binarySize, NULL);
+ test_error(error, "Unable to get binary size");
// Sanity check
- if( binarySize == 0 )
+ if (binarySize == 0)
{
- log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Binary size of program is zero (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
// Create a buffer and get the actual binary
- binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
- if (binary == NULL) {
- log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ );
+ binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize);
+ if (binary == NULL)
+ {
+ log_error("ERROR: Unable to allocate binary character array with %lu "
+ "characters! (in %s:%d)\n",
+ binarySize, __FILE__, __LINE__);
return -1;
}
- unsigned char *buffers[ 1 ] = { binary };
- cl_int loadErrors[ 1 ];
+ unsigned char *buffers[1] = { binary };
+ cl_int loadErrors[1];
// Do another sanity check here first
size_t size;
- error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
- test_error( error, "Unable to get expected size of binaries array" );
- if( size != sizeof( buffers ) )
+ error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &size);
+ test_error(error, "Unable to get expected size of binaries array");
+ if (size != sizeof(buffers))
{
- log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+ log_error("ERROR: Expected size of binaries array in clGetProgramInfo "
+ "is incorrect (should be %d, got %d) (in %s:%d)\n",
+ (int)sizeof(buffers), (int)size, __FILE__, __LINE__);
free(binary);
return -1;
}
- error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
- test_error( error, "Unable to get program binary" );
+ error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buffers),
+ &buffers, NULL);
+ test_error(error, "Unable to get program binary");
// use clCreateProgramWithBinary
- cl_program program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
- test_error( error, "Unable to create program with binary" );
+ cl_program program_with_binary = clCreateProgramWithBinary(
+ context, 1, &deviceID, &binarySize, (const unsigned char **)buffers,
+ loadErrors, &error);
+ test_error(error, "Unable to create program with binary");
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program_with_binary, NULL, NULL, &error);
- test_error( error, "Unable to link a simple program" );
+ cl_program my_newly_linked_program =
+ clLinkProgram(context, 1, &deviceID, NULL, 1, &program_with_binary,
+ NULL, NULL, &error);
+ test_error(error, "Unable to link a simple program");
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( program_with_binary );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program_with_binary);
+ test_error(error, "Unable to release program object");
free(binary);
return 0;
}
-int test_execute_after_serialize_reload_library(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_serialize_reload_library(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, another_program;
- size_t binarySize;
+ size_t binarySize;
unsigned char *binary;
- log_info("Testing execution after linking a binary with a simple library...\n");
+ log_info(
+ "Testing execution after linking a binary with a simple library...\n");
// we will test creation of a simple library from one file
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to create a simple library" );
+ cl_program my_newly_minted_library =
+ clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program,
+ NULL, NULL, &error);
+ test_error(error, "Unable to create a simple library");
// Get the size of the resulting library (only one device)
- error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
- test_error( error, "Unable to get binary size" );
+ error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARY_SIZES,
+ sizeof(binarySize), &binarySize, NULL);
+ test_error(error, "Unable to get binary size");
// Sanity check
- if( binarySize == 0 )
+ if (binarySize == 0)
{
- log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Binary size of program is zero (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
// Create a buffer and get the actual binary
- binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
- if (binary == NULL) {
- log_error( "ERROR: Unable to allocate binary character array with %lu characters (in %s:%d)!", binarySize, __FILE__, __LINE__);
+ binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize);
+ if (binary == NULL)
+ {
+ log_error("ERROR: Unable to allocate binary character array with %lu "
+ "characters (in %s:%d)!",
+ binarySize, __FILE__, __LINE__);
return -1;
}
- unsigned char *buffers[ 1 ] = { binary };
- cl_int loadErrors[ 1 ];
+ unsigned char *buffers[1] = { binary };
+ cl_int loadErrors[1];
// Do another sanity check here first
size_t size;
- error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size );
- test_error( error, "Unable to get expected size of binaries array" );
- if( size != sizeof( buffers ) )
- {
- log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+ error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, 0,
+ NULL, &size);
+ test_error(error, "Unable to get expected size of binaries array");
+ if (size != sizeof(buffers))
+ {
+ log_error("ERROR: Expected size of binaries array in clGetProgramInfo "
+ "is incorrect (should be %d, got %d) (in %s:%d)\n",
+ (int)sizeof(buffers), (int)size, __FILE__, __LINE__);
free(binary);
return -1;
}
- error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
- test_error( error, "Unable to get program binary" );
+ error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES,
+ sizeof(buffers), &buffers, NULL);
+ test_error(error, "Unable to get program binary");
// use clCreateProgramWithBinary
- cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
- test_error( error, "Unable to create program with binary" );
+ cl_program library_with_binary = clCreateProgramWithBinary(
+ context, 1, &deviceID, &binarySize, (const unsigned char **)buffers,
+ loadErrors, &error);
+ test_error(error, "Unable to create program with binary");
- error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
- if( another_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &another_program, 1, &another_simple_kernel);
+ if (another_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program program_and_archive[2] = { another_program, library_with_binary };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from a binary and a library" );
+ cl_program program_and_archive[2] = { another_program,
+ library_with_binary };
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+ test_error(error,
+ "Unable to create an executable from a binary and a library");
- cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
- cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
- test_error( error, "Unable to create another simple kernel" );
+ cl_kernel another_kernel =
+ clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+ test_error(error, "Unable to create another simple kernel");
error = verifyCopyBuffer(context, queue, another_kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseKernel( another_kernel );
- test_error( error, "Unable to release another kernel object" );
+ error = clReleaseKernel(another_kernel);
+ test_error(error, "Unable to release another kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( another_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(another_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( library_with_binary );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(library_with_binary);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
free(binary);
return 0;
}
-static void CL_CALLBACK program_compile_completion_callback(cl_program program, void* user_data)
+static void CL_CALLBACK program_compile_completion_callback(cl_program program,
+ void *user_data)
{
int error;
cl_event compile_program_completion_event = (cl_event)user_data;
- log_info("in the program_compile_completion_callback: program %p just completed compiling with '%p'\n", program, compile_program_completion_event);
+ log_info("in the program_compile_completion_callback: program %p just "
+ "completed compiling with '%p'\n",
+ program, compile_program_completion_event);
error = clSetUserEventStatus(compile_program_completion_event, CL_COMPLETE);
if (error != CL_SUCCESS)
{
- log_error( "ERROR: in the program_compile_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: in the program_compile_completion_callback: Unable "
+ "to set user event status to CL_COMPLETE! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
exit(-1);
}
- log_info("in the program_compile_completion_callback: Successfully signaled compile_program_completion_event event!\n");
+ log_info("in the program_compile_completion_callback: Successfully "
+ "signaled compile_program_completion_event event!\n");
}
-static void CL_CALLBACK program_link_completion_callback(cl_program program, void* user_data)
+static void CL_CALLBACK program_link_completion_callback(cl_program program,
+ void *user_data)
{
int error;
cl_event link_program_completion_event = (cl_event)user_data;
- log_info("in the program_link_completion_callback: program %p just completed linking with '%p'\n", program, link_program_completion_event);
+ log_info("in the program_link_completion_callback: program %p just "
+ "completed linking with '%p'\n",
+ program, link_program_completion_event);
error = clSetUserEventStatus(link_program_completion_event, CL_COMPLETE);
if (error != CL_SUCCESS)
{
- log_error( "ERROR: in the program_link_completion_callback: Unable to set user event status to CL_COMPLETE! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: in the program_link_completion_callback: Unable to "
+ "set user event status to CL_COMPLETE! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
exit(-1);
}
- log_info("in the program_link_completion_callback: Successfully signaled link_program_completion_event event!\n");
+ log_info("in the program_link_completion_callback: Successfully signaled "
+ "link_program_completion_event event!\n");
}
-int test_execute_after_simple_compile_and_link_with_callbacks(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_simple_compile_and_link_with_callbacks(
+ cl_device_id deviceID, cl_context context, cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program;
cl_event compile_program_completion_event, link_program_completion_event;
- log_info("Testing execution after a simple compile and link with callbacks...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ log_info("Testing execution after a simple compile and link with "
+ "callbacks...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
compile_program_completion_event = clCreateUserEvent(context, &error);
- test_error( error, "Unable to create a user event");
+ test_error(error, "Unable to create a user event");
error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL,
- program_compile_completion_callback, (void*)compile_program_completion_event);
- test_error( error, "Unable to compile a simple program" );
+ program_compile_completion_callback,
+ (void *)compile_program_completion_event);
+ test_error(error, "Unable to compile a simple program");
error = clWaitForEvents(1, &compile_program_completion_event);
- test_error( error, "clWaitForEvents failed when waiting on compile_program_completion_event");
+ test_error(error,
+ "clWaitForEvents failed when waiting on "
+ "compile_program_completion_event");
error = clReleaseEvent(compile_program_completion_event);
- test_error( error, "Unable to release event object" );
+ test_error(error, "Unable to release event object");
link_program_completion_event = clCreateUserEvent(context, &error);
- test_error( error, "Unable to create a user event");
+ test_error(error, "Unable to create a user event");
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 1, &program,
- program_link_completion_callback, (void*)link_program_completion_event, &error);
- test_error( error, "Unable to link a simple program" );
+ cl_program my_newly_linked_program =
+ clLinkProgram(context, 1, &deviceID, NULL, 1, &program,
+ program_link_completion_callback,
+ (void *)link_program_completion_event, &error);
+ test_error(error, "Unable to link a simple program");
error = clWaitForEvents(1, &link_program_completion_event);
- test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event");
+ test_error(
+ error,
+ "clWaitForEvents failed when waiting on link_program_completion_event");
error = clReleaseEvent(link_program_completion_event);
- test_error( error, "Unable to release event object" );
+ test_error(error, "Unable to release event object");
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_library_only(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_library_only(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
log_info("Testing creation of a simple library...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to create a simple library" );
+ cl_program my_newly_minted_library =
+ clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program,
+ NULL, NULL, &error);
+ test_error(error, "Unable to create a simple library");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_library_with_callback(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_library_with_callback(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program;
cl_event link_program_completion_event;
log_info("Testing creation of a simple library with a callback...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
link_program_completion_event = clCreateUserEvent(context, &error);
- test_error( error, "Unable to create a user event");
+ test_error(error, "Unable to create a user event");
- simple_user_data simple_link_user_data = {when_i_pondered_weak_and_weary, link_program_completion_event};
+ simple_user_data simple_link_user_data = { when_i_pondered_weak_and_weary,
+ link_program_completion_event };
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program,
- simple_link_callback, (void*)&simple_link_user_data, &error);
- test_error( error, "Unable to create a simple library" );
+ cl_program my_newly_minted_library = clLinkProgram(
+ context, 1, &deviceID, "-create-library", 1, &program,
+ simple_link_callback, (void *)&simple_link_user_data, &error);
+ test_error(error, "Unable to create a simple library");
error = clWaitForEvents(1, &link_program_completion_event);
- test_error( error, "clWaitForEvents failed when waiting on link_program_completion_event");
+ test_error(
+ error,
+ "clWaitForEvents failed when waiting on link_program_completion_event");
/* All done! */
error = clReleaseEvent(link_program_completion_event);
- test_error( error, "Unable to release event object" );
+ test_error(error, "Unable to release event object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_simple_library_with_link(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program, another_program;
log_info("Testing creation and linking with a simple library...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to create a simple library" );
+ cl_program my_newly_minted_library =
+ clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program,
+ NULL, NULL, &error);
+ test_error(error, "Unable to create a simple library");
- error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
- if( another_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &another_program, 1, &another_simple_kernel);
+ if (another_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program program_and_archive[2] = { another_program, my_newly_minted_library };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from a binary and a library" );
+ cl_program program_and_archive[2] = { another_program,
+ my_newly_minted_library };
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+ test_error(error,
+ "Unable to create an executable from a binary and a library");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( another_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(another_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_execute_after_simple_library_with_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_simple_library_with_link(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, another_program;
- log_info("Testing execution after linking a binary with a simple library...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ log_info(
+ "Testing execution after linking a binary with a simple library...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program, NULL, NULL, &error);
- test_error( error, "Unable to create a simple library" );
+ cl_program my_newly_minted_library =
+ clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program,
+ NULL, NULL, &error);
+ test_error(error, "Unable to create a simple library");
- error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
- if( another_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &another_program, 1, &another_simple_kernel);
+ if (another_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program program_and_archive[2] = { another_program, my_newly_minted_library };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from a binary and a library" );
+ cl_program program_and_archive[2] = { another_program,
+ my_newly_minted_library };
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+ test_error(error,
+ "Unable to create an executable from a binary and a library");
- cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
- cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
- test_error( error, "Unable to create another simple kernel" );
+ cl_kernel another_kernel =
+ clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+ test_error(error, "Unable to create another simple kernel");
error = verifyCopyBuffer(context, queue, another_kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseKernel( another_kernel );
- test_error( error, "Unable to release another kernel object" );
+ error = clReleaseKernel(another_kernel);
+ test_error(error, "Unable to release another kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( another_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(another_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_two_file_link(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program, another_program;
log_info("Testing two file compiling and linking...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
- if( another_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &another_program, 1, &another_simple_kernel);
+ if (another_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
cl_program two_programs[2] = { program, another_program };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from two binaries" );
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+ test_error(error, "Unable to create an executable from two binaries");
/* All done! */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( another_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(another_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_two_file_link(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
cl_program program, another_program;
- log_info("Testing two file compiling and linking and execution of two kernels afterwards ...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ log_info("Testing two file compiling and linking and execution of two "
+ "kernels afterwards ...\n");
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
- if( another_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &another_program, 1, &another_simple_kernel);
+ if (another_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
cl_program two_programs[2] = { program, another_program };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from two binaries" );
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+ test_error(error, "Unable to create an executable from two binaries");
- cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
- cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
- test_error( error, "Unable to create another simple kernel" );
+ cl_kernel another_kernel =
+ clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+ test_error(error, "Unable to create another simple kernel");
error = verifyCopyBuffer(context, queue, another_kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseKernel( another_kernel );
- test_error( error, "Unable to release another kernel object" );
+ error = clReleaseKernel(another_kernel);
+ test_error(error, "Unable to release another kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( another_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(another_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_embedded_header_link(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, header, simple_program;
log_info("Testing execution after embedded header link...\n");
// we will test execution after compiling and linking with embedded headers
- program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
- if( program == NULL || error != CL_SUCCESS )
+ program = clCreateProgramWithSource(
+ context, 1, &another_simple_kernel_with_header, NULL, &error);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- header = clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
- if( header == NULL || error != CL_SUCCESS )
+ header =
+ clCreateProgramWithSource(context, 1, &simple_header, NULL, &error);
+ if (header == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple header program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple header program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header, &simple_header_name, NULL, NULL);
- test_error( error, "Unable to compile a simple program with embedded header" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 1, &header,
+ &simple_header_name, NULL, NULL);
+ test_error(error,
+ "Unable to compile a simple program with embedded header");
- simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error);
- if( simple_program == NULL || error != CL_SUCCESS )
+ simple_program =
+ clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error);
+ if (simple_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
cl_program two_programs[2] = { program, simple_program };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" );
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+ test_error(error,
+ "Unable to create an executable from two binaries, one compiled "
+ "with embedded header");
- cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
- cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
- test_error( error, "Unable to create another simple kernel" );
+ cl_kernel another_kernel =
+ clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+ test_error(error, "Unable to create another simple kernel");
error = verifyCopyBuffer(context, queue, another_kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseKernel( another_kernel );
- test_error( error, "Unable to release another kernel object" );
+ error = clReleaseKernel(another_kernel);
+ test_error(error, "Unable to release another kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( header );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(header);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( simple_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(simple_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
#if defined(__APPLE__) || defined(__linux)
-#define _mkdir(x) mkdir(x,S_IRWXU)
+#define _mkdir(x) mkdir(x, S_IRWXU)
#define _chdir chdir
#define _rmdir rmdir
#define _unlink unlink
@@ -2517,461 +3078,602 @@ int test_execute_after_embedded_header_link(cl_device_id deviceID, cl_context co
#include <direct.h>
#endif
-int test_execute_after_included_header_link(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_execute_after_included_header_link(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
int error;
cl_program program, simple_program;
log_info("Testing execution after included header link...\n");
// we will test execution after compiling and linking with included headers
- program = clCreateProgramWithSource(context, 1, &another_simple_kernel_with_header, NULL, &error);
- if( program == NULL || error != CL_SUCCESS )
+ program = clCreateProgramWithSource(
+ context, 1, &another_simple_kernel_with_header, NULL, &error);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* setup */
-#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ ))
+#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
/* Some tests systems doesn't allow one to write in the test directory */
- if (_chdir("/tmp") != 0) {
- log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_chdir("/tmp") != 0)
+ {
+ log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
#endif
- if (_mkdir("foo") != 0) {
- log_error( "ERROR: Unable to create directory foo! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_mkdir("foo") != 0)
+ {
+ log_error("ERROR: Unable to create directory foo! (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
- if (_mkdir("foo/bar") != 0) {
- log_error( "ERROR: Unable to create directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_mkdir("foo/bar") != 0)
+ {
+ log_error("ERROR: Unable to create directory foo/bar! (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
- if (_chdir("foo/bar") != 0) {
- log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_chdir("foo/bar") != 0)
+ {
+ log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
- FILE* simple_header_file = fopen(simple_header_name, "w");
- if (simple_header_file == NULL) {
- log_error( "ERROR: Unable to create simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ );
+ FILE *simple_header_file = fopen(simple_header_name, "w");
+ if (simple_header_file == NULL)
+ {
+ log_error("ERROR: Unable to create simple header file %s! (in %s:%d)\n",
+ simple_header_name, __FILE__, __LINE__);
return -1;
}
- if (fprintf(simple_header_file, "%s", simple_header) < 0) {
- log_error( "ERROR: Unable to write to simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__);
+ if (fprintf(simple_header_file, "%s", simple_header) < 0)
+ {
+ log_error(
+ "ERROR: Unable to write to simple header file %s! (in %s:%d)\n",
+ simple_header_name, __FILE__, __LINE__);
return -1;
}
- if (fclose(simple_header_file) != 0) {
- log_error( "ERROR: Unable to close simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__);
+ if (fclose(simple_header_file) != 0)
+ {
+ log_error("ERROR: Unable to close simple header file %s! (in %s:%d)\n",
+ simple_header_name, __FILE__, __LINE__);
return -1;
}
- if (_chdir("../..") != 0) {
- log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__);
+ if (_chdir("../..") != 0)
+ {
+ log_error("ERROR: Unable to change to original working directory! (in "
+ "%s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
-#if (defined(__linux__) || defined(__APPLE__)) && (!defined( __ANDROID__ ))
- error = clCompileProgram(program, 1, &deviceID, "-I/tmp/foo/bar", 0, NULL, NULL, NULL, NULL);
+#if (defined(__linux__) || defined(__APPLE__)) && (!defined(__ANDROID__))
+ error = clCompileProgram(program, 1, &deviceID, "-I/tmp/foo/bar", 0, NULL,
+ NULL, NULL, NULL);
#else
- error = clCompileProgram(program, 1, &deviceID, "-Ifoo/bar", 0, NULL, NULL, NULL, NULL);
+ error = clCompileProgram(program, 1, &deviceID, "-Ifoo/bar", 0, NULL, NULL,
+ NULL, NULL);
#endif
- test_error( error, "Unable to compile a simple program with included header" );
+ test_error(error,
+ "Unable to compile a simple program with included header");
/* cleanup */
- if (_chdir("foo/bar") != 0) {
- log_error( "ERROR: Unable to change to directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_chdir("foo/bar") != 0)
+ {
+ log_error("ERROR: Unable to change to directory foo/bar! (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
- if (_unlink(simple_header_name) != 0) {
- log_error( "ERROR: Unable to remove simple header file %s! (in %s:%d)\n", simple_header_name, __FILE__, __LINE__ );
+ if (_unlink(simple_header_name) != 0)
+ {
+ log_error("ERROR: Unable to remove simple header file %s! (in %s:%d)\n",
+ simple_header_name, __FILE__, __LINE__);
return -1;
}
- if (_chdir("../..") != 0) {
- log_error( "ERROR: Unable to change to original working directory! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_chdir("../..") != 0)
+ {
+ log_error("ERROR: Unable to change to original working directory! (in "
+ "%s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
- if (_rmdir("foo/bar") != 0) {
- log_error( "ERROR: Unable to remove directory foo/bar! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_rmdir("foo/bar") != 0)
+ {
+ log_error("ERROR: Unable to remove directory foo/bar! (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
- if (_rmdir("foo") != 0) {
- log_error( "ERROR: Unable to remove directory foo! (in %s:%d)\n", __FILE__, __LINE__ );
+ if (_rmdir("foo") != 0)
+ {
+ log_error("ERROR: Unable to remove directory foo! (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
- simple_program = clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error);
- if( simple_program == NULL || error != CL_SUCCESS )
+ simple_program =
+ clCreateProgramWithSource(context, 1, &simple_kernel, NULL, &error);
+ if (simple_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
cl_program two_programs[2] = { program, simple_program };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from two binaries, one compiled with embedded header" );
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, two_programs, NULL, NULL, &error);
+ test_error(error,
+ "Unable to create an executable from two binaries, one compiled "
+ "with embedded header");
- cl_kernel kernel = clCreateKernel(fully_linked_program, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(fully_linked_program, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
- cl_kernel another_kernel = clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
- test_error( error, "Unable to create another simple kernel" );
+ cl_kernel another_kernel =
+ clCreateKernel(fully_linked_program, "AnotherCopyBuffer", &error);
+ test_error(error, "Unable to create another simple kernel");
error = verifyCopyBuffer(context, queue, another_kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseKernel( another_kernel );
- test_error( error, "Unable to release another kernel object" );
+ error = clReleaseKernel(another_kernel);
+ test_error(error, "Unable to release another kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( simple_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(simple_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_program_binary_type(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_program_binary_type(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
{
int error;
- cl_program program, another_program, program_with_binary, fully_linked_program_with_binary;
+ cl_program program, another_program, program_with_binary,
+ fully_linked_program_with_binary;
cl_program_binary_type program_type = -1;
size_t size;
- size_t binarySize;
+ size_t binarySize;
unsigned char *binary;
log_info("Testing querying of program binary type...\n");
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, NULL,
+ NULL);
+ test_error(error, "Unable to compile a simple program");
- error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
- test_error( error, "Unable to get program binary type" );
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BINARY_TYPE,
+ sizeof(cl_program_binary_type), &program_type,
+ NULL);
+ test_error(error, "Unable to get program binary type");
if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT)
{
- log_error( "ERROR: Expected program type of a just compiled program to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Expected program type of a just compiled program to "
+ "be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
program_type = -1;
// Get the size of the resulting binary (only one device)
- error = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
- test_error( error, "Unable to get binary size" );
+ error = clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES,
+ sizeof(binarySize), &binarySize, NULL);
+ test_error(error, "Unable to get binary size");
// Sanity check
- if( binarySize == 0 )
+ if (binarySize == 0)
{
- log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Binary size of program is zero (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
// Create a buffer and get the actual binary
{
- binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
- if (binary == NULL) {
- log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ );
+ binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize);
+ if (binary == NULL)
+ {
+ log_error("ERROR: Unable to allocate binary character array with "
+ "%lu characters! (in %s:%d)\n",
+ binarySize, __FILE__, __LINE__);
return -1;
}
- unsigned char *buffers[ 1 ] = { binary };
- cl_int loadErrors[ 1 ];
+ unsigned char *buffers[1] = { binary };
+ cl_int loadErrors[1];
// Do another sanity check here first
size_t size;
- error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, 0, NULL, &size );
- test_error( error, "Unable to get expected size of binaries array" );
- if( size != sizeof( buffers ) )
+ error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, 0, NULL, &size);
+ test_error(error, "Unable to get expected size of binaries array");
+ if (size != sizeof(buffers))
{
- log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Expected size of binaries array in clGetProgramInfo is "
+ "incorrect (should be %d, got %d) (in %s:%d)\n",
+ (int)sizeof(buffers), (int)size, __FILE__, __LINE__);
free(binary);
return -1;
}
- error = clGetProgramInfo( program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
- test_error( error, "Unable to get program binary" );
+ error = clGetProgramInfo(program, CL_PROGRAM_BINARIES, sizeof(buffers),
+ &buffers, NULL);
+ test_error(error, "Unable to get program binary");
// use clCreateProgramWithBinary
- program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
- test_error( error, "Unable to create program with binary" );
-
- error = clGetProgramBuildInfo (program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
- test_error( error, "Unable to get program binary type" );
+ program_with_binary = clCreateProgramWithBinary(
+ context, 1, &deviceID, &binarySize, (const unsigned char **)buffers,
+ loadErrors, &error);
+ test_error(error, "Unable to create program with binary");
+
+ error = clGetProgramBuildInfo(
+ program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE,
+ sizeof(cl_program_binary_type), &program_type, NULL);
+ test_error(error, "Unable to get program binary type");
if (program_type != CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT)
{
- log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Expected program type of a program created from "
+ "compiled object to be "
+ "CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
program_type = -1;
free(binary);
}
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", 1, &program_with_binary, NULL, NULL, &error);
- test_error( error, "Unable to create a simple library" );
- error = clGetProgramBuildInfo (my_newly_minted_library, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
- test_error( error, "Unable to get program binary type" );
+ cl_program my_newly_minted_library =
+ clLinkProgram(context, 1, &deviceID, "-create-library", 1,
+ &program_with_binary, NULL, NULL, &error);
+ test_error(error, "Unable to create a simple library");
+ error = clGetProgramBuildInfo(
+ my_newly_minted_library, deviceID, CL_PROGRAM_BINARY_TYPE,
+ sizeof(cl_program_binary_type), &program_type, NULL);
+ test_error(error, "Unable to get program binary type");
if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY)
{
- log_error( "ERROR: Expected program type of a just linked library to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Expected program type of a just linked library to be "
+ "CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
program_type = -1;
// Get the size of the resulting library (only one device)
- error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
- test_error( error, "Unable to get binary size" );
+ error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARY_SIZES,
+ sizeof(binarySize), &binarySize, NULL);
+ test_error(error, "Unable to get binary size");
// Sanity check
- if( binarySize == 0 )
+ if (binarySize == 0)
{
- log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Binary size of program is zero (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
// Create a buffer and get the actual binary
- binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
- if (binary == NULL) {
- log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__);
+ binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize);
+ if (binary == NULL)
+ {
+ log_error("ERROR: Unable to allocate binary character array with %lu "
+ "characters! (in %s:%d)\n",
+ binarySize, __FILE__, __LINE__);
return -1;
}
- unsigned char *buffers[ 1 ] = { binary };
- cl_int loadErrors[ 1 ];
+ unsigned char *buffers[1] = { binary };
+ cl_int loadErrors[1];
// Do another sanity check here first
- error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, 0, NULL, &size );
- test_error( error, "Unable to get expected size of binaries array" );
- if( size != sizeof( buffers ) )
- {
- log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+ error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES, 0,
+ NULL, &size);
+ test_error(error, "Unable to get expected size of binaries array");
+ if (size != sizeof(buffers))
+ {
+ log_error("ERROR: Expected size of binaries array in clGetProgramInfo "
+ "is incorrect (should be %d, got %d) (in %s:%d)\n",
+ (int)sizeof(buffers), (int)size, __FILE__, __LINE__);
free(binary);
return -1;
}
- error = clGetProgramInfo( my_newly_minted_library, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
- test_error( error, "Unable to get program binary" );
+ error = clGetProgramInfo(my_newly_minted_library, CL_PROGRAM_BINARIES,
+ sizeof(buffers), &buffers, NULL);
+ test_error(error, "Unable to get program binary");
// use clCreateProgramWithBinary
- cl_program library_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
- test_error( error, "Unable to create program with binary" );
- error = clGetProgramBuildInfo (library_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
- test_error( error, "Unable to get program binary type" );
+ cl_program library_with_binary = clCreateProgramWithBinary(
+ context, 1, &deviceID, &binarySize, (const unsigned char **)buffers,
+ loadErrors, &error);
+ test_error(error, "Unable to create program with binary");
+ error = clGetProgramBuildInfo(
+ library_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE,
+ sizeof(cl_program_binary_type), &program_type, NULL);
+ test_error(error, "Unable to get program binary type");
if (program_type != CL_PROGRAM_BINARY_TYPE_LIBRARY)
{
- log_error( "ERROR: Expected program type of a library loaded with binary to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Expected program type of a library loaded with "
+ "binary to be CL_PROGRAM_BINARY_TYPE_LIBRARY (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
program_type = -1;
- free(binary);
+ free(binary);
- error = create_single_kernel_helper_create_program(context, &another_program, 1, &another_simple_kernel);
- if( another_program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(
+ context, &another_program, 1, &another_simple_kernel);
+ if (another_program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
- error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(another_program, 1, &deviceID, NULL, 0, NULL, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- cl_program program_and_archive[2] = { another_program, library_with_binary };
- cl_program fully_linked_program = clLinkProgram(context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
- test_error( error, "Unable to create an executable from a binary and a library" );
+ cl_program program_and_archive[2] = { another_program,
+ library_with_binary };
+ cl_program fully_linked_program = clLinkProgram(
+ context, 1, &deviceID, "", 2, program_and_archive, NULL, NULL, &error);
+ test_error(error,
+ "Unable to create an executable from a binary and a library");
- error = clGetProgramBuildInfo (fully_linked_program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
- test_error( error, "Unable to get program binary type" );
+ error = clGetProgramBuildInfo(
+ fully_linked_program, deviceID, CL_PROGRAM_BINARY_TYPE,
+ sizeof(cl_program_binary_type), &program_type, NULL);
+ test_error(error, "Unable to get program binary type");
if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
{
- log_error( "ERROR: Expected program type of a newly build executable to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Expected program type of a newly build executable to "
+ "be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
program_type = -1;
// Get the size of the resulting binary (only one device)
- error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARY_SIZES, sizeof( binarySize ), &binarySize, NULL );
- test_error( error, "Unable to get binary size" );
+ error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARY_SIZES,
+ sizeof(binarySize), &binarySize, NULL);
+ test_error(error, "Unable to get binary size");
// Sanity check
- if( binarySize == 0 )
+ if (binarySize == 0)
{
- log_error( "ERROR: Binary size of program is zero (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Binary size of program is zero (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
// Create a buffer and get the actual binary
{
- binary = (unsigned char*)malloc(sizeof(unsigned char)*binarySize);
- if (binary == NULL) {
- log_error( "ERROR: Unable to allocate binary character array with %lu characters! (in %s:%d)\n", binarySize, __FILE__, __LINE__ );
+ binary = (unsigned char *)malloc(sizeof(unsigned char) * binarySize);
+ if (binary == NULL)
+ {
+ log_error("ERROR: Unable to allocate binary character array with "
+ "%lu characters! (in %s:%d)\n",
+ binarySize, __FILE__, __LINE__);
return -1;
}
- unsigned char *buffers[ 1 ] = { binary };
- cl_int loadErrors[ 1 ];
+ unsigned char *buffers[1] = { binary };
+ cl_int loadErrors[1];
// Do another sanity check here first
size_t size;
- error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, 0, NULL, &size );
- test_error( error, "Unable to get expected size of binaries array" );
- if( size != sizeof( buffers ) )
+ error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARIES, 0,
+ NULL, &size);
+ test_error(error, "Unable to get expected size of binaries array");
+ if (size != sizeof(buffers))
{
- log_error( "ERROR: Expected size of binaries array in clGetProgramInfo is incorrect (should be %d, got %d) (in %s:%d)\n", (int)sizeof( buffers ), (int)size, __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Expected size of binaries array in clGetProgramInfo is "
+ "incorrect (should be %d, got %d) (in %s:%d)\n",
+ (int)sizeof(buffers), (int)size, __FILE__, __LINE__);
free(binary);
return -1;
}
- error = clGetProgramInfo( fully_linked_program, CL_PROGRAM_BINARIES, sizeof( buffers ), &buffers, NULL );
- test_error( error, "Unable to get program binary" );
+ error = clGetProgramInfo(fully_linked_program, CL_PROGRAM_BINARIES,
+ sizeof(buffers), &buffers, NULL);
+ test_error(error, "Unable to get program binary");
// use clCreateProgramWithBinary
- fully_linked_program_with_binary = clCreateProgramWithBinary(context, 1, &deviceID, &binarySize, (const unsigned char**)buffers, loadErrors, &error);
- test_error( error, "Unable to create program with binary" );
-
- error = clGetProgramBuildInfo (fully_linked_program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
- test_error( error, "Unable to get program binary type" );
+ fully_linked_program_with_binary = clCreateProgramWithBinary(
+ context, 1, &deviceID, &binarySize, (const unsigned char **)buffers,
+ loadErrors, &error);
+ test_error(error, "Unable to create program with binary");
+
+ error = clGetProgramBuildInfo(
+ fully_linked_program_with_binary, deviceID, CL_PROGRAM_BINARY_TYPE,
+ sizeof(cl_program_binary_type), &program_type, NULL);
+ test_error(error, "Unable to get program binary type");
if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
{
- log_error( "ERROR: Expected program type of a program created from a fully linked executable binary to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: Expected program type of a program created from "
+ "a fully linked executable binary to be "
+ "CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
program_type = -1;
free(binary);
}
- error = clBuildProgram(fully_linked_program_with_binary, 1, &deviceID, NULL, NULL, NULL);
- test_error( error, "Unable to build a simple program" );
+ error = clBuildProgram(fully_linked_program_with_binary, 1, &deviceID, NULL,
+ NULL, NULL);
+ test_error(error, "Unable to build a simple program");
- cl_kernel kernel = clCreateKernel(fully_linked_program_with_binary, "CopyBuffer", &error);
- test_error( error, "Unable to create a simple kernel" );
+ cl_kernel kernel =
+ clCreateKernel(fully_linked_program_with_binary, "CopyBuffer", &error);
+ test_error(error, "Unable to create a simple kernel");
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
- cl_kernel another_kernel = clCreateKernel(fully_linked_program_with_binary, "AnotherCopyBuffer", &error);
- test_error( error, "Unable to create another simple kernel" );
+ cl_kernel another_kernel = clCreateKernel(fully_linked_program_with_binary,
+ "AnotherCopyBuffer", &error);
+ test_error(error, "Unable to create another simple kernel");
error = verifyCopyBuffer(context, queue, another_kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseKernel( another_kernel );
- test_error( error, "Unable to release another kernel object" );
+ error = clReleaseKernel(another_kernel);
+ test_error(error, "Unable to release another kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- /* Oh, one more thing. Steve Jobs and apparently Herb Sutter. The question is "Who is copying whom?" */
- error = create_single_kernel_helper_create_program(context, &program, 1, &simple_kernel);
- if( program == NULL || error != CL_SUCCESS )
+ /* Oh, one more thing. Steve Jobs and apparently Herb Sutter. The question
+ * is "Who is copying whom?" */
+ error = create_single_kernel_helper_create_program(context, &program, 1,
+ &simple_kernel);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create a simple test program! (%s in %s:%d)\n", IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Unable to create a simple test program! (%s in %s:%d)\n",
+ IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
error = clBuildProgram(program, 1, &deviceID, NULL, NULL, NULL);
- test_error( error, "Unable to build a simple program" );
- error = clGetProgramBuildInfo (program, deviceID, CL_PROGRAM_BINARY_TYPE, sizeof(cl_program_binary_type), &program_type, NULL);
- test_error( error, "Unable to get program binary type" );
+ test_error(error, "Unable to build a simple program");
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BINARY_TYPE,
+ sizeof(cl_program_binary_type), &program_type,
+ NULL);
+ test_error(error, "Unable to get program binary type");
if (program_type != CL_PROGRAM_BINARY_TYPE_EXECUTABLE)
{
- log_error( "ERROR: Expected program type of a program created from compiled object to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error(
+ "ERROR: Expected program type of a program created from compiled "
+ "object to be CL_PROGRAM_BINARY_TYPE_EXECUTABLE (in %s:%d)\n",
+ __FILE__, __LINE__);
return -1;
}
program_type = -1;
/* All's well that ends well. William Shakespeare */
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( another_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(another_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( library_with_binary );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(library_with_binary);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( fully_linked_program_with_binary );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(fully_linked_program_with_binary);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( program_with_binary );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program_with_binary);
+ test_error(error, "Unable to release program object");
return 0;
}
-volatile int compileNotificationSent;
+volatile int compileNotificationSent;
-void CL_CALLBACK test_notify_compile_complete( cl_program program, void *userData )
+void CL_CALLBACK test_notify_compile_complete(cl_program program,
+ void *userData)
{
- if( userData == NULL || strcmp( (char *)userData, "compilation" ) != 0 )
+ if (userData == NULL || strcmp((char *)userData, "compilation") != 0)
{
- log_error( "ERROR: User data passed in to compile notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: User data passed in to compile notify function was "
+ "not correct! (in %s:%d)\n",
+ __FILE__, __LINE__);
compileNotificationSent = -1;
}
else
compileNotificationSent = 1;
- log_info( "\n <-- program successfully compiled\n" );
+ log_info("\n <-- program successfully compiled\n");
}
-volatile int libraryCreationNotificationSent;
+volatile int libraryCreationNotificationSent;
-void CL_CALLBACK test_notify_create_library_complete( cl_program program, void *userData )
+void CL_CALLBACK test_notify_create_library_complete(cl_program program,
+ void *userData)
{
- if( userData == NULL || strcmp( (char *)userData, "create library" ) != 0 )
+ if (userData == NULL || strcmp((char *)userData, "create library") != 0)
{
- log_error( "ERROR: User data passed in to library creation notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: User data passed in to library creation notify "
+ "function was not correct! (in %s:%d)\n",
+ __FILE__, __LINE__);
libraryCreationNotificationSent = -1;
}
else
libraryCreationNotificationSent = 1;
- log_info( "\n <-- library successfully created\n" );
+ log_info("\n <-- library successfully created\n");
}
-volatile int linkNotificationSent;
+volatile int linkNotificationSent;
-void CL_CALLBACK test_notify_link_complete( cl_program program, void *userData )
+void CL_CALLBACK test_notify_link_complete(cl_program program, void *userData)
{
- if( userData == NULL || strcmp( (char *)userData, "linking" ) != 0 )
+ if (userData == NULL || strcmp((char *)userData, "linking") != 0)
{
- log_error( "ERROR: User data passed in to link notify function was not correct! (in %s:%d)\n", __FILE__, __LINE__ );
+ log_error("ERROR: User data passed in to link notify function was not "
+ "correct! (in %s:%d)\n",
+ __FILE__, __LINE__);
linkNotificationSent = -1;
}
else
linkNotificationSent = 1;
- log_info( "\n <-- program successfully linked\n" );
+ log_info("\n <-- program successfully linked\n");
}
-int test_large_compile_and_link_status_options_log(cl_context context, cl_device_id deviceID, cl_command_queue queue, unsigned int numLines)
+int test_large_compile_and_link_status_options_log(cl_context context,
+ cl_device_id deviceID,
+ cl_command_queue queue,
+ unsigned int numLines)
{
int error;
cl_program program;
- cl_program * simple_kernels;
+ cl_program *simple_kernels;
const char **lines;
unsigned int i;
char buffer[MAX_LINE_SIZE_IN_PROGRAM];
@@ -2984,263 +3686,349 @@ int test_large_compile_and_link_status_options_log(cl_context context, cl_device
cl_build_status status;
size_t size_ret;
- compileNotificationSent = libraryCreationNotificationSent = linkNotificationSent = 0;
+ compileNotificationSent = libraryCreationNotificationSent =
+ linkNotificationSent = 0;
- simple_kernels = (cl_program*)malloc(numLines*sizeof(cl_program));
- if (simple_kernels == NULL) {
- log_error( "ERROR: Unable to allocate kernels array with %d kernels! (in %s:%d)\n", numLines, __FILE__, __LINE__);
+ simple_kernels = (cl_program *)malloc(numLines * sizeof(cl_program));
+ if (simple_kernels == NULL)
+ {
+ log_error("ERROR: Unable to allocate kernels array with %d kernels! "
+ "(in %s:%d)\n",
+ numLines, __FILE__, __LINE__);
return -1;
}
/* First, allocate the array for our line pointers */
- lines = (const char **)malloc( (2*numLines + 2) * sizeof( const char * ) );
- if (lines == NULL) {
- log_error( "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n", (2*numLines + 2), __FILE__, __LINE__);
+ lines = (const char **)malloc((2 * numLines + 2) * sizeof(const char *));
+ if (lines == NULL)
+ {
+ log_error(
+ "ERROR: Unable to allocate lines array with %d lines! (in %s:%d)\n",
+ (2 * numLines + 2), __FILE__, __LINE__);
return -1;
}
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, composite_kernel_extern_template, i);
lines[i] = _strdup(buffer);
}
/* First and last lines are easy */
- lines[ numLines ] = composite_kernel_start;
- lines[ 2*numLines + 1] = composite_kernel_end;
+ lines[numLines] = composite_kernel_start;
+ lines[2 * numLines + 1] = composite_kernel_end;
/* Fill the rest with templated kernels */
- for(i = numLines + 1; i < 2*numLines + 1; i++ )
+ for (i = numLines + 1; i < 2 * numLines + 1; i++)
{
sprintf(buffer, composite_kernel_template, i - numLines - 1);
- lines[ i ] = _strdup(buffer);
+ lines[i] = _strdup(buffer);
}
/* Try to create a program with these lines */
- error = create_single_kernel_helper_create_program(context, &program, 2 * numLines + 2, lines);
- if( program == NULL || error != CL_SUCCESS )
+ error = create_single_kernel_helper_create_program(context, &program,
+ 2 * numLines + 2, lines);
+ if (program == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s) (in %s:%d)\n", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d lines! "
+ "(%s) (in %s:%d)\n",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Lets check that the compilation status is CL_BUILD_NONE */
- error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get program compile status" );
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Unable to get program compile status");
if (status != CL_BUILD_NONE)
{
- log_error( "ERROR: Expected compile status to be CL_BUILD_NONE prior to the beginning of the compilation! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+ log_error("ERROR: Expected compile status to be CL_BUILD_NONE prior to "
+ "the beginning of the compilation! (status: %d in %s:%d)\n",
+ (int)status, __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL, test_notify_compile_complete, (void *)"compilation");
- test_error( error, "Unable to compile a simple program" );
+ error =
+ clCompileProgram(program, 1, &deviceID, NULL, 0, NULL, NULL,
+ test_notify_compile_complete, (void *)"compilation");
+ test_error(error, "Unable to compile a simple program");
- /* Wait for compile to complete (just keep polling, since we're just a test */
- error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get program compile status" );
+ /* Wait for compile to complete (just keep polling, since we're just a test
+ */
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Unable to get program compile status");
- while( (int)status == CL_BUILD_IN_PROGRESS )
+ while ((int)status == CL_BUILD_IN_PROGRESS)
{
- log_info( "\n -- still waiting for compile... (status is %d)", status );
- sleep( 1 );
- error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get program compile status" );
+ log_info("\n -- still waiting for compile... (status is %d)", status);
+ sleep(1);
+ error =
+ clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_STATUS,
+ sizeof(status), &status, NULL);
+ test_error(error, "Unable to get program compile status");
}
- if( status != CL_BUILD_SUCCESS )
+ if (status != CL_BUILD_SUCCESS)
{
- log_error( "ERROR: compile failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+ log_error("ERROR: compile failed! (status: %d in %s:%d)\n", (int)status,
+ __FILE__, __LINE__);
return -1;
}
- error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret );
- test_error( error, "Device failed to return compile log size" );
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG, 0,
+ NULL, &size_ret);
+ test_error(error, "Device failed to return compile log size");
compile_log = (char *)malloc(size_ret);
- error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, compile_log, NULL );
- if (error != CL_SUCCESS){
- log_error("Device failed to return a compile log (in %s:%d)\n", __FILE__, __LINE__);
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_LOG,
+ size_ret, compile_log, NULL);
+ if (error != CL_SUCCESS)
+ {
+ log_error("Device failed to return a compile log (in %s:%d)\n",
+ __FILE__, __LINE__);
test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
}
log_info("BUILD LOG: %s\n", compile_log);
free(compile_log);
- error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret );
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_OPTIONS,
+ 0, NULL, &size_ret);
test_error(error, "Device failed to return compile options size");
compile_options = (char *)malloc(size_ret);
- error = clGetProgramBuildInfo( program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, compile_options, NULL );
- test_error(error, "Device failed to return compile options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed");
+ error = clGetProgramBuildInfo(program, deviceID, CL_PROGRAM_BUILD_OPTIONS,
+ size_ret, compile_options, NULL);
+ test_error(
+ error,
+ "Device failed to return compile options.\nclGetProgramBuildInfo "
+ "CL_PROGRAM_BUILD_OPTIONS failed");
log_info("BUILD OPTIONS: %s\n", compile_options);
free(compile_options);
/* Create and compile templated kernels */
- for( i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
sprintf(buffer, simple_kernel_template, i);
- const char* kernel_source = _strdup(buffer);
- error = create_single_kernel_helper_create_program(context, &simple_kernels[i], 1, &kernel_source);
- if( simple_kernels[i] == NULL || error != CL_SUCCESS )
+ const char *kernel_source = _strdup(buffer);
+ error = create_single_kernel_helper_create_program(
+ context, &simple_kernels[i], 1, &kernel_source);
+ if (simple_kernels[i] == NULL || error != CL_SUCCESS)
{
- log_error( "ERROR: Unable to create long test program with %d lines! (%s in %s:%d)", numLines, IGetErrorString( error ), __FILE__, __LINE__ );
+ log_error("ERROR: Unable to create long test program with %d "
+ "lines! (%s in %s:%d)",
+ numLines, IGetErrorString(error), __FILE__, __LINE__);
return -1;
}
/* Compile it */
- error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL, NULL, NULL, NULL);
- test_error( error, "Unable to compile a simple program" );
+ error = clCompileProgram(simple_kernels[i], 1, &deviceID, NULL, 0, NULL,
+ NULL, NULL, NULL);
+ test_error(error, "Unable to compile a simple program");
- free((void*)kernel_source);
+ free((void *)kernel_source);
}
/* Create library out of compiled templated kernels */
- cl_program my_newly_minted_library = clLinkProgram(context, 1, &deviceID, "-create-library", numLines, simple_kernels, test_notify_create_library_complete, (void *)"create library", &error);
- test_error( error, "Unable to create a multi-line library" );
+ cl_program my_newly_minted_library = clLinkProgram(
+ context, 1, &deviceID, "-create-library", numLines, simple_kernels,
+ test_notify_create_library_complete, (void *)"create library", &error);
+ test_error(error, "Unable to create a multi-line library");
- /* Wait for library creation to complete (just keep polling, since we're just a test */
- error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get library creation link status" );
+ /* Wait for library creation to complete (just keep polling, since we're
+ * just a test */
+ error = clGetProgramBuildInfo(my_newly_minted_library, deviceID,
+ CL_PROGRAM_BUILD_STATUS, sizeof(status),
+ &status, NULL);
+ test_error(error, "Unable to get library creation link status");
- while( (int)status == CL_BUILD_IN_PROGRESS )
+ while ((int)status == CL_BUILD_IN_PROGRESS)
{
- log_info( "\n -- still waiting for library creation... (status is %d)", status );
- sleep( 1 );
- error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get library creation link status" );
+ log_info("\n -- still waiting for library creation... (status is %d)",
+ status);
+ sleep(1);
+ error = clGetProgramBuildInfo(my_newly_minted_library, deviceID,
+ CL_PROGRAM_BUILD_STATUS, sizeof(status),
+ &status, NULL);
+ test_error(error, "Unable to get library creation link status");
}
- if( status != CL_BUILD_SUCCESS )
+ if (status != CL_BUILD_SUCCESS)
{
- log_error( "ERROR: library creation failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+ log_error("ERROR: library creation failed! (status: %d in %s:%d)\n",
+ (int)status, __FILE__, __LINE__);
return -1;
}
- error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret );
- test_error( error, "Device failed to return a library creation log size" );
+ error = clGetProgramBuildInfo(my_newly_minted_library, deviceID,
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+ test_error(error, "Device failed to return a library creation log size");
library_log = (char *)malloc(size_ret);
- error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, library_log, NULL );
- if (error != CL_SUCCESS) {
- log_error("Device failed to return a library creation log (in %s:%d)\n", __FILE__, __LINE__);
+ error = clGetProgramBuildInfo(my_newly_minted_library, deviceID,
+ CL_PROGRAM_BUILD_LOG, size_ret, library_log,
+ NULL);
+ if (error != CL_SUCCESS)
+ {
+ log_error("Device failed to return a library creation log (in %s:%d)\n",
+ __FILE__, __LINE__);
test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
}
log_info("CREATE LIBRARY LOG: %s\n", library_log);
free(library_log);
- error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret );
+ error = clGetProgramBuildInfo(my_newly_minted_library, deviceID,
+ CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret);
test_error(error, "Device failed to return library creation options size");
library_options = (char *)malloc(size_ret);
- error = clGetProgramBuildInfo( my_newly_minted_library, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, library_options, NULL );
- test_error(error, "Device failed to return library creation options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed");
+ error = clGetProgramBuildInfo(my_newly_minted_library, deviceID,
+ CL_PROGRAM_BUILD_OPTIONS, size_ret,
+ library_options, NULL);
+ test_error(
+ error,
+ "Device failed to return library creation "
+ "options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed");
log_info("CREATE LIBRARY OPTIONS: %s\n", library_options);
free(library_options);
- /* Link the program that calls the kernels and the library that contains them */
+ /* Link the program that calls the kernels and the library that contains
+ * them */
cl_program programs[2] = { program, my_newly_minted_library };
- cl_program my_newly_linked_program = clLinkProgram(context, 1, &deviceID, NULL, 2, programs, test_notify_link_complete, (void *)"linking", &error);
- test_error( error, "Unable to link a program with a library" );
+ cl_program my_newly_linked_program =
+ clLinkProgram(context, 1, &deviceID, NULL, 2, programs,
+ test_notify_link_complete, (void *)"linking", &error);
+ test_error(error, "Unable to link a program with a library");
- /* Wait for linking to complete (just keep polling, since we're just a test */
- error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get program link status" );
+ /* Wait for linking to complete (just keep polling, since we're just a test
+ */
+ error = clGetProgramBuildInfo(my_newly_linked_program, deviceID,
+ CL_PROGRAM_BUILD_STATUS, sizeof(status),
+ &status, NULL);
+ test_error(error, "Unable to get program link status");
- while( (int)status == CL_BUILD_IN_PROGRESS )
+ while ((int)status == CL_BUILD_IN_PROGRESS)
{
- log_info( "\n -- still waiting for program linking... (status is %d)", status );
- sleep( 1 );
- error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_STATUS, sizeof( status ), &status, NULL );
- test_error( error, "Unable to get program link status" );
+ log_info("\n -- still waiting for program linking... (status is %d)",
+ status);
+ sleep(1);
+ error = clGetProgramBuildInfo(my_newly_linked_program, deviceID,
+ CL_PROGRAM_BUILD_STATUS, sizeof(status),
+ &status, NULL);
+ test_error(error, "Unable to get program link status");
}
- if( status != CL_BUILD_SUCCESS )
+ if (status != CL_BUILD_SUCCESS)
{
- log_error( "ERROR: program linking failed! (status: %d in %s:%d)\n", (int)status, __FILE__, __LINE__ );
+ log_error("ERROR: program linking failed! (status: %d in %s:%d)\n",
+ (int)status, __FILE__, __LINE__);
return -1;
}
- error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret );
- test_error( error, "Device failed to return a linking log size" );
+ error = clGetProgramBuildInfo(my_newly_linked_program, deviceID,
+ CL_PROGRAM_BUILD_LOG, 0, NULL, &size_ret);
+ test_error(error, "Device failed to return a linking log size");
linking_log = (char *)malloc(size_ret);
- error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_LOG, size_ret, linking_log, NULL );
- if (error != CL_SUCCESS){
- log_error("Device failed to return a linking log (in %s:%d).\n", __FILE__, __LINE__);
+ error = clGetProgramBuildInfo(my_newly_linked_program, deviceID,
+ CL_PROGRAM_BUILD_LOG, size_ret, linking_log,
+ NULL);
+ if (error != CL_SUCCESS)
+ {
+ log_error("Device failed to return a linking log (in %s:%d).\n",
+ __FILE__, __LINE__);
test_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
}
log_info("BUILDING LOG: %s\n", linking_log);
free(linking_log);
- error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret );
+ error = clGetProgramBuildInfo(my_newly_linked_program, deviceID,
+ CL_PROGRAM_BUILD_OPTIONS, 0, NULL, &size_ret);
test_error(error, "Device failed to return linking options size");
linking_options = (char *)malloc(size_ret);
- error = clGetProgramBuildInfo( my_newly_linked_program, deviceID, CL_PROGRAM_BUILD_OPTIONS, size_ret, linking_options, NULL );
- test_error(error, "Device failed to return linking options.\nclGetProgramBuildInfo CL_PROGRAM_BUILD_OPTIONS failed");
+ error = clGetProgramBuildInfo(my_newly_linked_program, deviceID,
+ CL_PROGRAM_BUILD_OPTIONS, size_ret,
+ linking_options, NULL);
+ test_error(
+ error,
+ "Device failed to return linking options.\nclGetProgramBuildInfo "
+ "CL_PROGRAM_BUILD_OPTIONS failed");
log_info("BUILDING OPTIONS: %s\n", linking_options);
free(linking_options);
// Create the composite kernel
- cl_kernel kernel = clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
- test_error( error, "Unable to create a composite kernel" );
+ cl_kernel kernel =
+ clCreateKernel(my_newly_linked_program, "CompositeKernel", &error);
+ test_error(error, "Unable to create a composite kernel");
// Run the composite kernel and verify the results
error = verifyCopyBuffer(context, queue, kernel);
- if (error != CL_SUCCESS)
- return error;
+ if (error != CL_SUCCESS) return error;
/* All done! */
- error = clReleaseKernel( kernel );
- test_error( error, "Unable to release kernel object" );
+ error = clReleaseKernel(kernel);
+ test_error(error, "Unable to release kernel object");
- error = clReleaseProgram( program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(program);
+ test_error(error, "Unable to release program object");
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- free( (void*)lines[i] );
- free( (void*)lines[i+numLines+1] );
+ free((void *)lines[i]);
+ free((void *)lines[i + numLines + 1]);
}
- free( lines );
+ free(lines);
- for(i = 0; i < numLines; i++)
+ for (i = 0; i < numLines; i++)
{
- error = clReleaseProgram( simple_kernels[i] );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(simple_kernels[i]);
+ test_error(error, "Unable to release program object");
}
- free( simple_kernels );
+ free(simple_kernels);
- error = clReleaseProgram( my_newly_minted_library );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_minted_library);
+ test_error(error, "Unable to release program object");
- error = clReleaseProgram( my_newly_linked_program );
- test_error( error, "Unable to release program object" );
+ error = clReleaseProgram(my_newly_linked_program);
+ test_error(error, "Unable to release program object");
return 0;
}
-int test_compile_and_link_status_options_log(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_compile_and_link_status_options_log(cl_device_id deviceID,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
{
- unsigned int toTest[] = { 256, 0 }; //512, 1024, 8192, 16384, 32768, 0 };
+ unsigned int toTest[] = { 256, 0 }; // 512, 1024, 8192, 16384, 32768, 0 };
unsigned int i;
- log_info( "Testing Compile and Link Status, Options and Logging ...this might take awhile...\n" );
+ log_info("Testing Compile and Link Status, Options and Logging ...this "
+ "might take awhile...\n");
- for( i = 0; toTest[ i ] != 0; i++ )
+ for (i = 0; toTest[i] != 0; i++)
{
- log_info( " %d...\n", toTest[ i ] );
+ log_info(" %d...\n", toTest[i]);
#if defined(_WIN32)
clock_t start = clock();
-#elif defined(__linux__) || defined(__APPLE__)
- timeval time1, time2;
- gettimeofday(&time1, NULL);
+#elif defined(__linux__) || defined(__APPLE__)
+ timeval time1, time2;
+ gettimeofday(&time1, NULL);
#endif
- if( test_large_compile_and_link_status_options_log( context, deviceID, queue, toTest[ i ] ) != 0 )
+ if (test_large_compile_and_link_status_options_log(context, deviceID,
+ queue, toTest[i])
+ != 0)
{
- log_error( "ERROR: large program compilation, linking, status, options and logging test failed for %d lines! (in %s:%d)\n", toTest[ i ], __FILE__, __LINE__ );
+ log_error(
+ "ERROR: large program compilation, linking, status, options "
+ "and logging test failed for %d lines! (in %s:%d)\n",
+ toTest[i], __FILE__, __LINE__);
return -1;
}
#if defined(_WIN32)
clock_t end = clock();
- log_perf( (float)( end - start ) / (float)CLOCKS_PER_SEC, false, "clock() time in secs", "%d lines", toTest[i] );
-#elif defined(__linux__) || defined(__APPLE__)
- gettimeofday(&time2, NULL);
- log_perf( (float)(float)(time2.tv_sec - time1.tv_sec) + 1.0e-6 * (time2.tv_usec - time1.tv_usec) , false, "wall time in secs", "%d lines", toTest[i] );
+ log_perf((float)(end - start) / (float)CLOCKS_PER_SEC, false,
+ "clock() time in secs", "%d lines", toTest[i]);
+#elif defined(__linux__) || defined(__APPLE__)
+ gettimeofday(&time2, NULL);
+ log_perf((float)(float)(time2.tv_sec - time1.tv_sec)
+ + 1.0e-6 * (time2.tv_usec - time1.tv_usec),
+ false, "wall time in secs", "%d lines", toTest[i]);
#endif
}
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index d53af8dc..89626b79 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -76,6 +76,7 @@ const char *known_extensions[] = {
"cl_khr_device_uuid",
"cl_khr_pci_bus_info",
"cl_khr_suggested_local_work_size",
+ "cl_khr_expect_assume",
"cl_khr_spirv_linkonce_odr",
"cl_khr_semaphore",
"cl_khr_external_semaphore",
diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 9cecabea..b1d73af3 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -439,8 +439,8 @@ int getPlatformConfigInfo(cl_platform_id platform, config_info* info)
err = clGetPlatformInfo(platform, info->opcode, config_size_set,
&info->config.cl_name_version_single,
&config_size_ret);
+ size_err = config_size_set != config_size_ret;
}
- size_err = config_size_set != config_size_ret;
break;
default:
log_error("Unknown config type: %d\n", info->config_type);
@@ -585,8 +585,8 @@ int getConfigInfo(cl_device_id device, config_info* info)
err = clGetDeviceInfo(device, info->opcode, config_size_set,
&info->config.cl_name_version_single,
&config_size_ret);
+ size_err = config_size_set != config_size_ret;
}
- size_err = config_size_set != config_size_ret;
break;
default:
log_error("Unknown config type: %d\n", info->config_type);
diff --git a/test_conformance/conversions/CMakeLists.txt b/test_conformance/conversions/CMakeLists.txt
index cc019b26..8ed3ba18 100644
--- a/test_conformance/conversions/CMakeLists.txt
+++ b/test_conformance/conversions/CMakeLists.txt
@@ -16,6 +16,6 @@ set_source_files_properties(
COMPILE_FLAGS -march=i686)
endif(NOT CMAKE_CL_64 AND NOT MSVC AND NOT ANDROID)
-set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
+set_gnulike_module_compile_flags("-Wno-unused-but-set-variable -Wno-sign-compare")
include(../CMakeCommon.txt)
diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index dfb32279..1020638a 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -1,6 +1,6 @@
//
// Copyright (c) 2017 The Khronos Group Inc.
-//
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -15,2243 +15,1496 @@
//
#include "harness/testHarness.h"
#include "harness/compat.h"
+#include "harness/ThreadPool.h"
-#include "basic_test_conversions.h"
-#include <limits.h>
-#include <string.h>
+#if defined(__APPLE__)
+#include <sys/sysctl.h>
+#include <mach/mach_time.h>
+#endif
-#include "harness/mt19937.h"
+#if defined(__linux__)
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <linux/sysctl.h>
+#endif
+#if defined(__linux__)
+#include <sys/param.h>
+#include <libgen.h>
+#endif
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-#include "fplib.h"
+#if defined(__MINGW32__)
+#include <sys/param.h>
#endif
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
-/* Rounding modes and saturation for use with qcom 64 bit to float conversion library */
- bool qcom_sat;
- roundingMode qcom_rm;
+#include <sstream>
+#include <stdarg.h>
+#if !defined(_WIN32)
+#include <libgen.h>
+#include <sys/mman.h>
#endif
+#include <time.h>
-static inline cl_ulong random64( MTdata d );
+#include <algorithm>
-#if defined (_WIN32)
- #include <mmintrin.h>
- #include <emmintrin.h>
+#include <vector>
+#include <type_traits>
+
+#include "basic_test_conversions.h"
+
+#if defined(_WIN32)
+#include <mmintrin.h>
+#include <emmintrin.h>
#else // !_WIN32
-#if defined (__SSE__ )
- #include <xmmintrin.h>
+#if defined(__SSE__)
+#include <xmmintrin.h>
#endif
-#if defined (__SSE2__ )
- #include <emmintrin.h>
+#if defined(__SSE2__)
+#include <emmintrin.h>
#endif
#endif // _WIN32
-const char *gTypeNames[ kTypeCount ] = {
- "uchar", "char",
- "ushort", "short",
- "uint", "int",
- "float", "double",
- "ulong", "long"
- };
-
-const char *gRoundingModeNames[ kRoundingModeCount ] = {
- "",
- "_rte",
- "_rtp",
- "_rtn",
- "_rtz"
- };
-
-const char *gSaturationNames[ 2 ] = { "", "_sat" };
-
-size_t gTypeSizes[ kTypeCount ] = {
- sizeof( cl_uchar ), sizeof( cl_char ),
- sizeof( cl_ushort ), sizeof( cl_short ),
- sizeof( cl_uint ), sizeof( cl_int ),
- sizeof( cl_float ), sizeof( cl_double ),
- sizeof( cl_ulong ), sizeof( cl_long ),
- };
-
-long lrintf_clamped( float f );
-long lrintf_clamped( float f )
-{
- static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+cl_context gContext = NULL;
+cl_command_queue gQueue = NULL;
+int gStartTestNumber = -1;
+int gEndTestNumber = 0;
+#if defined(__APPLE__)
+int gTimeResults = 1;
+#else
+int gTimeResults = 0;
+#endif
+int gReportAverageTimes = 0;
+void *gIn = NULL;
+void *gRef = NULL;
+void *gAllowZ = NULL;
+void *gOut[kCallStyleCount] = { NULL };
+cl_mem gInBuffer;
+cl_mem gOutBuffers[kCallStyleCount];
+size_t gComputeDevices = 0;
+uint32_t gDeviceFrequency = 0;
+int gWimpyMode = 0;
+int gWimpyReductionFactor = 128;
+int gSkipTesting = 0;
+int gForceFTZ = 0;
+int gIsRTZ = 0;
+uint32_t gSimdSize = 1;
+int gHasDouble = 0;
+int gTestDouble = 1;
+const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
+int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
+int gMinVectorSize = 0;
+int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]);
+MTdata gMTdata;
+const char **argList = NULL;
+int argCount = 0;
+
+
+double SubtractTime(uint64_t endTime, uint64_t startTime);
+
+
+// clang-format off
+// for readability sake keep this section unformatted
+
+std::vector<unsigned int> DataInitInfo::specialValuesUInt = {
+ uint32_t(INT_MIN), uint32_t(INT_MIN + 1), uint32_t(INT_MIN + 2),
+ uint32_t(-(1 << 30) - 3), uint32_t(-(1 << 30) - 2), uint32_t(-(1 << 30) - 1), uint32_t(-(1 << 30)),
+ uint32_t(-(1 << 30) + 1), uint32_t(-(1 << 30) + 2), uint32_t(-(1 << 30) + 3),
+ uint32_t(-(1 << 24) - 3), uint32_t(-(1 << 24) - 2),uint32_t(-(1 << 24) - 1),
+ uint32_t(-(1 << 24)), uint32_t(-(1 << 24) + 1), uint32_t(-(1 << 24) + 2), uint32_t(-(1 << 24) + 3),
+ uint32_t(-(1 << 23) - 3), uint32_t(-(1 << 23) - 2),uint32_t(-(1 << 23) - 1),
+ uint32_t(-(1 << 23)), uint32_t(-(1 << 23) + 1), uint32_t(-(1 << 23) + 2), uint32_t(-(1 << 23) + 3),
+ uint32_t(-(1 << 22) - 3), uint32_t(-(1 << 22) - 2),uint32_t(-(1 << 22) - 1),
+ uint32_t(-(1 << 22)), uint32_t(-(1 << 22) + 1), uint32_t(-(1 << 22) + 2), uint32_t(-(1 << 22) + 3),
+ uint32_t(-(1 << 21) - 3), uint32_t(-(1 << 21) - 2),uint32_t(-(1 << 21) - 1),
+ uint32_t(-(1 << 21)), uint32_t(-(1 << 21) + 1), uint32_t(-(1 << 21) + 2), uint32_t(-(1 << 21) + 3),
+ uint32_t(-(1 << 16) - 3), uint32_t(-(1 << 16) - 2),uint32_t(-(1 << 16) - 1),
+ uint32_t(-(1 << 16)), uint32_t(-(1 << 16) + 1), uint32_t(-(1 << 16) + 2), uint32_t(-(1 << 16) + 3),
+ uint32_t(-(1 << 15) - 3), uint32_t(-(1 << 15) - 2),uint32_t(-(1 << 15) - 1),
+ uint32_t(-(1 << 15)), uint32_t(-(1 << 15) + 1), uint32_t(-(1 << 15) + 2), uint32_t(-(1 << 15) + 3),
+ uint32_t(-(1 << 8) - 3), uint32_t(-(1 << 8) - 2),uint32_t(-(1 << 8) - 1),
+ uint32_t(-(1 << 8)), uint32_t(-(1 << 8) + 1), uint32_t(-(1 << 8) + 2), uint32_t(-(1 << 8) + 3),
+ uint32_t(-(1 << 7) - 3), uint32_t(-(1 << 7) - 2),uint32_t(-(1 << 7) - 1),
+ uint32_t(-(1 << 7)), uint32_t(-(1 << 7) + 1), uint32_t(-(1 << 7) + 2), uint32_t(-(1 << 7) + 3),
+ uint32_t(-4), uint32_t(-3), uint32_t(-2), uint32_t(-1), 0, 1, 2, 3, 4,
+ (1 << 7) - 3,(1 << 7) - 2,(1 << 7) - 1, (1 << 7), (1 << 7) + 1, (1 << 7) + 2, (1 << 7) + 3,
+ (1 << 8) - 3,(1 << 8) - 2,(1 << 8) - 1, (1 << 8), (1 << 8) + 1, (1 << 8) + 2, (1 << 8) + 3,
+ (1 << 15) - 3,(1 << 15) - 2,(1 << 15) - 1, (1 << 15), (1 << 15) + 1, (1 << 15) + 2, (1 << 15) + 3,
+ (1 << 16) - 3,(1 << 16) - 2,(1 << 16) - 1, (1 << 16), (1 << 16) + 1, (1 << 16) + 2, (1 << 16) + 3,
+ (1 << 21) - 3,(1 << 21) - 2,(1 << 21) - 1, (1 << 21), (1 << 21) + 1, (1 << 21) + 2, (1 << 21) + 3,
+ (1 << 22) - 3,(1 << 22) - 2,(1 << 22) - 1, (1 << 22), (1 << 22) + 1, (1 << 22) + 2, (1 << 22) + 3,
+ (1 << 23) - 3,(1 << 23) - 2,(1 << 23) - 1, (1 << 23), (1 << 23) + 1, (1 << 23) + 2, (1 << 23) + 3,
+ (1 << 24) - 3,(1 << 24) - 2,(1 << 24) - 1, (1 << 24), (1 << 24) + 1, (1 << 24) + 2, (1 << 24) + 3,
+ (1 << 30) - 3,(1 << 30) - 2,(1 << 30) - 1, (1 << 30), (1 << 30) + 1, (1 << 30) + 2, (1 << 30) + 3,
+ INT_MAX - 3, INT_MAX - 2, INT_MAX - 1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
+ UINT_MAX - 3, UINT_MAX - 2, UINT_MAX - 1, UINT_MAX
+};
- if( f >= -(float) LONG_MIN )
- return LONG_MAX;
+std::vector<float> DataInitInfo::specialValuesFloat = {
+ -NAN, -INFINITY, -FLT_MAX,
+ MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39),
+ MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
+ MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7),
+ MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6),
+ -1000.f, -100.f, -4.0f, -3.5f, -3.0f,
+ MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f,
+ MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f,
+ MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f,
+ MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24), MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f,
+ MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25), MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f,
+ MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f,
+ MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27), MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN,
+ MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150),
+ MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150),
+ MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150),
+ MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150), MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150),
+ MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150),
+ MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f, +NAN, +INFINITY, +FLT_MAX,
+ MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39),
+ MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
+ MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7),
+ MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6),
+ +1000.f, +100.f, +4.0f, +3.5f, +3.0f,
+ MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23), +2.0f,
+ MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24),
+ MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
+ MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26),
+ MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
+ MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150),
+ MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150),
+ MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150),
+ MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150), MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150),
+ MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150),
+ MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
+};
- if( f <= (float) LONG_MIN )
- return LONG_MIN;
+// A table of more difficult cases to get right
+std::vector<double> DataInitInfo::specialValuesDouble = {
+ -NAN, -INFINITY, -DBL_MAX,
+ MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
+ MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8),
+ MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12),
+ MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59),
+ MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
+ MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+ MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28),
+ MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8),
+ MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21),
+ MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27),
+ MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21),
+ MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22),
+ -1000., -100., -4.0, -3.5, -3.0,
+ MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5,
+ MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0,
+ MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5,
+ MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0,
+ MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5,
+ MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25,
+ MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
+ -DBL_MIN,
+ MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
+ MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
+ MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
+ MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074),
+ MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074),
+ MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
+ MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
+ -0.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
+ MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59),
+ MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
+ MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
+ MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28),
+ MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20), MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8),
+ MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21),
+ MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27),
+ MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21),
+ MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22),
+ +1000., +100., +4.0, +3.5, +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5,
+ MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
+ +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52), MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
+ +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53),
+ +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54),
+ +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55), MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
+ +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
+ MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
+ MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
+ MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074),
+ MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074),
+ MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
+ MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
+ MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0, MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10),
+ MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
+ MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10),
+ MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1),
+ MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1),
+ MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1),
+ MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0),
+ MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0),
+ MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0),
+ MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1),
+ MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1),
+ MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1),
+ MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0),
+ MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52),
+ MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0),
+ MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1),
+ MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53),
+ MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1),
+ MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53), MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2),
+ MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54),
+ MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2),
+ MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54), MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10),
+ MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10),
+ MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30),
+ MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
+};
+// clang-format on
- // Round fractional values to integer in round towards nearest mode
- if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
- {
- volatile float x = f;
- float magicVal = magic[ f < 0 ];
-
-#if defined( __SSE__ ) || defined (_WIN32)
- // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
- __m128 v = _mm_set_ss( x );
- __m128 m = _mm_set_ss( magicVal );
- v = _mm_add_ss( v, m );
- v = _mm_sub_ss( v, m );
- _mm_store_ss( (float*) &x, v );
+
+// Windows (since long double got deprecated) sets the x87 to 53-bit precision
+// (that's x87 default state). This causes problems with the tests that
+// convert long and ulong to float and double or otherwise deal with values
+// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
+static inline void Force64BitFPUPrecision(void)
+{
+#if __MINGW32__
+ // The usual method is to use _controlfp as follows:
+ // #include <float.h>
+ // _controlfp(_PC_64, _MCW_PC);
+ //
+ // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
+ // divergent code just use inline assembly which works for both.
+ unsigned short int orig_cw = 0;
+ unsigned short int new_cw = 0;
+ __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
+ new_cw = orig_cw | 0x0300; // set precision to 64-bit
+ __asm__ __volatile__("fldcw %0" ::"m"(new_cw));
#else
- x += magicVal;
- x -= magicVal;
+ /* Implement for other platforms if needed */
#endif
- f = x;
+}
+
+
+template <typename InType, typename OutType>
+int CalcRefValsPat<InType, OutType>::check_result(void *test, uint32_t count,
+ int vectorSize)
+{
+ const cl_uchar *a = (const cl_uchar *)gAllowZ;
+
+ if (std::is_integral<OutType>::value)
+ { // char/uchar/short/ushort/int/uint/long/ulong
+ const OutType *t = (const OutType *)test;
+ const OutType *c = (const OutType *)gRef;
+ for (uint32_t i = 0; i < count; i++)
+ if (t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (OutType)0))
+ {
+ size_t s = sizeof(OutType) * 2;
+ std::stringstream sstr;
+ sstr << "\nError for vector size %d found at 0x%8.8x: *0x%"
+ << s << "." << s << "x vs 0x%" << s << "." << s << "x\n";
+ vlog(sstr.str().c_str(), vectorSize, i, c[i], t[i]);
+ return i + 1;
+ }
+ }
+ else if (std::is_same<OutType, cl_float>::value)
+ {
+ // cast to integral - from original test
+ const cl_uint *t = (const cl_uint *)test;
+ const cl_uint *c = (const cl_uint *)gRef;
+
+ for (uint32_t i = 0; i < count; i++)
+ if (t[i] != c[i] &&
+ // Allow nan's to be binary different
+ !((t[i] & 0x7fffffffU) > 0x7f800000U
+ && (c[i] & 0x7fffffffU) > 0x7f800000U)
+ && !(a[i] != (cl_uchar)0 && t[i] == (c[i] & 0x80000000U)))
+ {
+ vlog(
+ "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
+ vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
+ return i + 1;
+ }
+ }
+ else
+ {
+ const cl_ulong *t = (const cl_ulong *)test;
+ const cl_ulong *c = (const cl_ulong *)gRef;
+
+ for (uint32_t i = 0; i < count; i++)
+ if (t[i] != c[i] &&
+ // Allow nan's to be binary different
+ !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL
+ && (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL)
+ && !(a[i] != (cl_uchar)0
+ && t[i] == (c[i] & 0x8000000000000000ULL)))
+ {
+ vlog(
+ "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
+ vectorSize, i, ((OutType *)gRef)[i], ((OutType *)test)[i]);
+ return i + 1;
+ }
}
- return (long) f;
+ return 0;
}
-long long llrintf_clamped( float f );
-long long llrintf_clamped( float f )
+
+cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
{
- static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+ if (0 == (x & (x - 1))) return x;
- if( f >= -(float) LLONG_MIN )
- return LLONG_MAX;
+ while (x & (x - 1)) x &= x - 1;
+
+ return x + x;
+}
- if( f <= (float) LLONG_MIN )
- return LLONG_MIN;
- // Round fractional values to integer in round towards nearest mode
- if( fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23) )
+cl_int CustomConversionsTest::Run()
+{
+ int startMinVectorSize = gMinVectorSize;
+ Type inType, outType;
+ RoundingMode round;
+ SaturationMode sat;
+
+ for (int i = 0; i < argCount; i++)
{
- volatile float x = f;
- float magicVal = magic[ f < 0 ];
-#if defined( __SSE__ ) || defined (_WIN32)
- // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
- __m128 v = _mm_set_ss( x );
- __m128 m = _mm_set_ss( magicVal );
- v = _mm_add_ss( v, m );
- v = _mm_sub_ss( v, m );
- _mm_store_ss( (float*) &x, v );
-#else
- x += magicVal;
- x -= magicVal;
-#endif
- f = x;
+ if (conv_test::GetTestCase(argList[i], &outType, &inType, &sat, &round))
+ {
+ vlog_error("\n\t\t**** ERROR: Unable to parse function name "
+ "%s. Skipping.... *****\n\n",
+ argList[i]);
+ continue;
+ }
+
+ // skip double if we don't have it
+ if (!gTestDouble && (inType == kdouble || outType == kdouble))
+ {
+ if (gHasDouble)
+ {
+ vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
+ gTypeNames[outType], gSaturationNames[sat],
+ gRoundingModeNames[round], gTypeNames[inType]);
+ vlog("\t\tcl_khr_fp64 enabled, but double testing turned "
+ "off.\n");
+ }
+ continue;
+ }
+
+ // skip longs on embedded
+ if (!gHasLong
+ && (inType == klong || outType == klong || inType == kulong
+ || outType == kulong))
+ {
+ continue;
+ }
+
+ // Skip the implicit converts if the rounding mode is not default or
+ // test is saturated
+ if (0 == startMinVectorSize)
+ {
+ if (sat || round != kDefaultRoundingMode)
+ gMinVectorSize = 1;
+ else
+ gMinVectorSize = 0;
+ }
+
+ IterOverSelectedTypes iter(typeIterator, *this, inType, outType, round,
+ sat);
+
+ iter.Run();
+
+ if (gFailCount)
+ {
+ vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
+ gTypeNames[outType], gSaturationNames[sat],
+ gRoundingModeNames[round], gTypeNames[inType]);
+ }
}
- return (long long) f;
+ return gFailCount;
}
-long lrint_clamped( double f );
-long lrint_clamped( double f )
+
+ConversionsTest::ConversionsTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : context(context), device(device), queue(queue), num_elements(0),
+ typeIterator({ cl_uchar(0), cl_char(0), cl_ushort(0), cl_short(0),
+ cl_uint(0), cl_int(0), cl_float(0), cl_double(0),
+ cl_ulong(0), cl_long(0) })
+{}
+
+
+cl_int ConversionsTest::Run()
{
- static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+ IterOverTypes iter(typeIterator, *this);
- if( sizeof( long ) > 4 )
+ iter.Run();
+
+ return gFailCount;
+}
+
+
+cl_int ConversionsTest::SetUp(int elements)
+{
+ num_elements = elements;
+ return CL_SUCCESS;
+}
+
+
+template <typename InType, typename OutType>
+void ConversionsTest::TestTypesConversion(const Type &inType,
+ const Type &outType, int &testNumber,
+ int startMinVectorSize)
+{
+ SaturationMode sat;
+ RoundingMode round;
+ int error;
+
+ // skip longs on embedded
+ if (!gHasLong
+ && (inType == klong || outType == klong || inType == kulong
+ || outType == kulong))
{
- if( f >= -(double) LONG_MIN )
- return LONG_MAX;
+ return;
}
- else
+
+ for (sat = (SaturationMode)0; sat < kSaturationModeCount;
+ sat = (SaturationMode)(sat + 1))
{
- if( f >= LONG_MAX )
- return LONG_MAX;
- }
+ // skip illegal saturated conversions to float type
+ if (kSaturated == sat && (outType == kfloat || outType == kdouble))
+ {
+ continue;
+ }
+
+ for (round = (RoundingMode)0; round < kRoundingModeCount;
+ round = (RoundingMode)(round + 1))
+ {
+ if (++testNumber < gStartTestNumber)
+ {
+ continue;
+ }
+ else
+ {
+ if (gEndTestNumber > 0 && testNumber >= gEndTestNumber) return;
+ }
- if( f <= (double) LONG_MIN )
- return LONG_MIN;
+ vlog("%d) Testing convert_%sn%s%s( %sn ):\n", testNumber,
+ gTypeNames[outType], gSaturationNames[sat],
+ gRoundingModeNames[round], gTypeNames[inType]);
- // Round fractional values to integer in round towards nearest mode
- if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
- {
- volatile double x = f;
- double magicVal = magic[ f < 0 ];
-#if defined( __SSE2__ ) || defined (_MSC_VER)
- // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
- __m128d v = _mm_set_sd( x );
- __m128d m = _mm_set_sd( magicVal );
- v = _mm_add_sd( v, m );
- v = _mm_sub_sd( v, m );
- _mm_store_sd( (double*) &x, v );
-#else
- x += magicVal;
- x -= magicVal;
-#endif
- f = x;
- }
+ // skip double if we don't have it
+ if (!gTestDouble && (inType == kdouble || outType == kdouble))
+ {
+ if (gHasDouble)
+ {
+ vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
+ "FAILED ** \n",
+ testNumber, gTypeNames[outType],
+ gSaturationNames[sat], gRoundingModeNames[round],
+ gTypeNames[inType]);
+ vlog("\t\tcl_khr_fp64 enabled, but double "
+ "testing turned off.\n");
+ }
+ continue;
+ }
+
+ // Skip the implicit converts if the rounding mode is
+ // not default or test is saturated
+ if (0 == startMinVectorSize)
+ {
+ if (sat || round != kDefaultRoundingMode)
+ gMinVectorSize = 1;
+ else
+ gMinVectorSize = 0;
+ }
- return (long) f;
+ if ((error = DoTest<InType, OutType>(outType, inType, sat, round)))
+ {
+ vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
+ "FAILED ** \n",
+ testNumber, gTypeNames[outType],
+ gSaturationNames[sat], gRoundingModeNames[round],
+ gTypeNames[inType]);
+ }
+ }
+ }
}
-long long llrint_clamped( double f );
-long long llrint_clamped( double f )
+
+template <typename InType, typename OutType>
+int ConversionsTest::DoTest(Type outType, Type inType, SaturationMode sat,
+ RoundingMode round)
{
- static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+#ifdef __APPLE__
+ cl_ulong wall_start = mach_absolute_time();
+#endif
+
+ cl_uint threads = GetThreadCount();
- if( f >= -(double) LLONG_MIN )
- return LLONG_MAX;
+ DataInitInfo info = { 0, 0, outType, inType, sat, round, threads };
+ DataInfoSpec<InType, OutType> init_info(info);
+ WriteInputBufferInfo writeInputBufferInfo;
+ int vectorSize;
+ int error = 0;
+ uint64_t i;
- if( f <= (double) LLONG_MIN )
- return LLONG_MIN;
+ gTestCount++;
+ size_t blockCount =
+ BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
+ size_t step = blockCount;
- // Round fractional values to integer in round towards nearest mode
- if( fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52) )
+ for (i = 0; i < threads; i++)
{
- volatile double x = f;
- double magicVal = magic[ f < 0 ];
-#if defined( __SSE2__ ) || defined (_MSC_VER)
- // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
- __m128d v = _mm_set_sd( x );
- __m128d m = _mm_set_sd( magicVal );
- v = _mm_add_sd( v, m );
- v = _mm_sub_sd( v, m );
- _mm_store_sd( (double*) &x, v );
-#else
- x += magicVal;
- x -= magicVal;
-#endif
- f = x;
+ init_info.mdv.emplace_back(MTdataHolder(gRandomSeed));
}
- return (long long) f;
-}
-
+ writeInputBufferInfo.outType = outType;
+ writeInputBufferInfo.inType = inType;
-/*
- Names created as:
+ writeInputBufferInfo.calcInfo.resize(gMaxVectorSize);
+ for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+ {
+ writeInputBufferInfo.calcInfo[vectorSize].reset(
+ new CalcRefValsPat<InType, OutType>());
+ writeInputBufferInfo.calcInfo[vectorSize]->program =
+ conv_test::MakeProgram(
+ outType, inType, sat, round, vectorSize,
+ &writeInputBufferInfo.calcInfo[vectorSize]->kernel);
+ if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->program)
+ {
+ gFailCount++;
+ return -1;
+ }
+ if (NULL == writeInputBufferInfo.calcInfo[vectorSize]->kernel)
+ {
+ gFailCount++;
+ vlog_error("\t\tFAILED -- Failed to create kernel.\n");
+ return -2;
+ }
- #include <stdio.h>
+ writeInputBufferInfo.calcInfo[vectorSize]->parent =
+ &writeInputBufferInfo;
+ writeInputBufferInfo.calcInfo[vectorSize]->vectorSize = vectorSize;
+ writeInputBufferInfo.calcInfo[vectorSize]->result = -1;
+ }
- const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
+ if (gSkipTesting) return error;
- int main( void )
+ // Patch up rounding mode if default is RTZ
+ // We leave the part above in default rounding mode so that the right kernel
+ // is compiled.
+ if (std::is_same<OutType, cl_float>::value)
{
+ if (round == kDefaultRoundingMode && gIsRTZ)
+ init_info.round = round = kRoundTowardZero;
+ }
- int i,j;
+ // Figure out how many elements are in a work block
+ // we handle 64-bit types a bit differently.
+ uint64_t lastCase = (8 * gTypeSizes[inType] > 32)
+ ? 0x100000000ULL
+ : 1ULL << (8 * gTypeSizes[inType]);
- for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
- for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
- {
- if( j == i )
- continue;
+ if (!gWimpyMode && gIsEmbedded)
+ step = blockCount * EMBEDDED_REDUCTION_FACTOR;
- vlog( "void %s2%s( void *, void *);\n", names[i], names[j] );
- }
+ if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
+ vlog("Testing... ");
+ fflush(stdout);
+ for (i = 0; i < (uint64_t)lastCase; i += step)
+ {
+ if (0 == (i & ((lastCase >> 3) - 1)))
+ {
+ vlog(".");
+ fflush(stdout);
+ }
- return 0;
- }
-*/
-
-static float my_fabsf( float x );
-static double my_fabs( double x );
-
-
-
-static void uchar2char( void *, void *);
-static void uchar2ushort( void *, void *);
-static void uchar2short( void *, void *);
-static void uchar2uint( void *, void *);
-static void uchar2int( void *, void *);
-static void uchar2float( void *, void *);
-static void uchar2double( void *, void *);
-static void uchar2ulong( void *, void *);
-static void uchar2long( void *, void *);
-static void char2uchar( void *, void *);
-static void char2ushort( void *, void *);
-static void char2short( void *, void *);
-static void char2uint( void *, void *);
-static void char2int( void *, void *);
-static void char2float( void *, void *);
-static void char2double( void *, void *);
-static void char2ulong( void *, void *);
-static void char2long( void *, void *);
-static void ushort2uchar( void *, void *);
-static void ushort2char( void *, void *);
-static void ushort2short( void *, void *);
-static void ushort2uint( void *, void *);
-static void ushort2int( void *, void *);
-static void ushort2float( void *, void *);
-static void ushort2double( void *, void *);
-static void ushort2ulong( void *, void *);
-static void ushort2long( void *, void *);
-static void short2uchar( void *, void *);
-static void short2char( void *, void *);
-static void short2ushort( void *, void *);
-static void short2uint( void *, void *);
-static void short2int( void *, void *);
-static void short2float( void *, void *);
-static void short2double( void *, void *);
-static void short2ulong( void *, void *);
-static void short2long( void *, void *);
-static void uint2uchar( void *, void *);
-static void uint2char( void *, void *);
-static void uint2ushort( void *, void *);
-static void uint2short( void *, void *);
-static void uint2int( void *, void *);
-static void uint2float( void *, void *);
-static void uint2double( void *, void *);
-static void uint2ulong( void *, void *);
-static void uint2long( void *, void *);
-static void int2uchar( void *, void *);
-static void int2char( void *, void *);
-static void int2ushort( void *, void *);
-static void int2short( void *, void *);
-static void int2uint( void *, void *);
-static void int2float( void *, void *);
-static void int2double( void *, void *);
-static void int2ulong( void *, void *);
-static void int2long( void *, void *);
-static void float2uchar( void *, void *);
-static void float2char( void *, void *);
-static void float2ushort( void *, void *);
-static void float2short( void *, void *);
-static void float2uint( void *, void *);
-static void float2int( void *, void *);
-static void float2double( void *, void *);
-static void float2ulong( void *, void *);
-static void float2long( void *, void *);
-static void double2uchar( void *, void *);
-static void double2char( void *, void *);
-static void double2ushort( void *, void *);
-static void double2short( void *, void *);
-static void double2uint( void *, void *);
-static void double2int( void *, void *);
-static void double2float( void *, void *);
-static void double2ulong( void *, void *);
-static void double2long( void *, void *);
-static void ulong2uchar( void *, void *);
-static void ulong2char( void *, void *);
-static void ulong2ushort( void *, void *);
-static void ulong2short( void *, void *);
-static void ulong2uint( void *, void *);
-static void ulong2int( void *, void *);
-static void ulong2float( void *, void *);
-static void ulong2double( void *, void *);
-static void ulong2long( void *, void *);
-static void long2uchar( void *, void *);
-static void long2char( void *, void *);
-static void long2ushort( void *, void *);
-static void long2short( void *, void *);
-static void long2uint( void *, void *);
-static void long2int( void *, void *);
-static void long2float( void *, void *);
-static void long2double( void *, void *);
-static void long2ulong( void *, void *);
-
-/*
- Conversion list created as
-
- #include <stdio.h>
-
- const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
-
- int main( void )
- {
+ cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
+ writeInputBufferInfo.count = count;
- int i,j;
+ // Crate a user event to represent the status of the reference value
+ // computation completion
+ writeInputBufferInfo.calcReferenceValues =
+ clCreateUserEvent(gContext, &error);
+ if (error || NULL == writeInputBufferInfo.calcReferenceValues)
+ {
+ vlog_error("ERROR: Unable to create user event. (%d)\n", error);
+ gFailCount++;
+ return error;
+ }
- for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
+ // retain for consumption by MapOutputBufferComplete
+ for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
+ vectorSize++)
{
- vlog( "{ " );
- for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
+ if ((error =
+ clRetainEvent(writeInputBufferInfo.calcReferenceValues)))
{
- if( j == i )
- vlog( " NULL, " );
- else
- {
- char s[64];
- sprintf( s, "%s2%s,", names[j], names[i] );
- vlog( "%15s ", s );
- }
+ vlog_error("ERROR: Unable to retain user event. (%d)\n", error);
+ gFailCount++;
+ return error;
}
- vlog( "},\n" );
}
- return 0;
- }
-
- */
-/*
-Convert gConversions[kTypeCount][kTypeCount] = {
-{ NULL, char2uchar, ushort2uchar, short2uchar, uint2uchar, int2uchar, float2uchar, double2uchar, ulong2uchar, long2uchar, },
-{ uchar2char, NULL, ushort2char, short2char, uint2char, int2char, float2char, double2char, ulong2char, long2char, },
-{ uchar2ushort, char2ushort, NULL, short2ushort, uint2ushort, int2ushort, float2ushort, double2ushort, ulong2ushort, long2ushort, },
-{ uchar2short, char2short, ushort2short, NULL, uint2short, int2short, float2short, double2short, ulong2short, long2short, },
-{ uchar2uint, char2uint, ushort2uint, short2uint, NULL, int2uint, float2uint, double2uint, ulong2uint, long2uint, },
-{ uchar2int, char2int, ushort2int, short2int, uint2int, NULL, float2int, double2int, ulong2int, long2int, },
-{ uchar2float, char2float, ushort2float, short2float, uint2float, int2float, NULL, double2float, ulong2float, long2float, },
-{ uchar2double, char2double, ushort2double, short2double, uint2double, int2double, float2double, NULL, ulong2double, long2double, },
-{ uchar2ulong, char2ulong, ushort2ulong, short2ulong, uint2ulong, int2ulong, float2ulong, double2ulong, NULL, long2ulong, },
-{ uchar2long, char2long, ushort2long, short2long, uint2long, int2long, float2long, double2long, ulong2long, NULL, } };
-*/
-
-static void uchar2char_sat( void *, void *);
-static void uchar2ushort_sat( void *, void *);
-static void uchar2short_sat( void *, void *);
-static void uchar2uint_sat( void *, void *);
-static void uchar2int_sat( void *, void *);
-static void uchar2float_sat( void *, void *);
-static void uchar2double_sat( void *, void *);
-static void uchar2ulong_sat( void *, void *);
-static void uchar2long_sat( void *, void *);
-static void char2uchar_sat( void *, void *);
-static void char2ushort_sat( void *, void *);
-static void char2short_sat( void *, void *);
-static void char2uint_sat( void *, void *);
-static void char2int_sat( void *, void *);
-static void char2float_sat( void *, void *);
-static void char2double_sat( void *, void *);
-static void char2ulong_sat( void *, void *);
-static void char2long_sat( void *, void *);
-static void ushort2uchar_sat( void *, void *);
-static void ushort2char_sat( void *, void *);
-static void ushort2short_sat( void *, void *);
-static void ushort2uint_sat( void *, void *);
-static void ushort2int_sat( void *, void *);
-static void ushort2float_sat( void *, void *);
-static void ushort2double_sat( void *, void *);
-static void ushort2ulong_sat( void *, void *);
-static void ushort2long_sat( void *, void *);
-static void short2uchar_sat( void *, void *);
-static void short2char_sat( void *, void *);
-static void short2ushort_sat( void *, void *);
-static void short2uint_sat( void *, void *);
-static void short2int_sat( void *, void *);
-static void short2float_sat( void *, void *);
-static void short2double_sat( void *, void *);
-static void short2ulong_sat( void *, void *);
-static void short2long_sat( void *, void *);
-static void uint2uchar_sat( void *, void *);
-static void uint2char_sat( void *, void *);
-static void uint2ushort_sat( void *, void *);
-static void uint2short_sat( void *, void *);
-static void uint2int_sat( void *, void *);
-static void uint2float_sat( void *, void *);
-static void uint2double_sat( void *, void *);
-static void uint2ulong_sat( void *, void *);
-static void uint2long_sat( void *, void *);
-static void int2uchar_sat( void *, void *);
-static void int2char_sat( void *, void *);
-static void int2ushort_sat( void *, void *);
-static void int2short_sat( void *, void *);
-static void int2uint_sat( void *, void *);
-static void int2float_sat( void *, void *);
-static void int2double_sat( void *, void *);
-static void int2ulong_sat( void *, void *);
-static void int2long_sat( void *, void *);
-static void float2uchar_sat( void *, void *);
-static void float2char_sat( void *, void *);
-static void float2ushort_sat( void *, void *);
-static void float2short_sat( void *, void *);
-static void float2uint_sat( void *, void *);
-static void float2int_sat( void *, void *);
-static void float2double_sat( void *, void *);
-static void float2ulong_sat( void *, void *);
-static void float2long_sat( void *, void *);
-static void double2uchar_sat( void *, void *);
-static void double2char_sat( void *, void *);
-static void double2ushort_sat( void *, void *);
-static void double2short_sat( void *, void *);
-static void double2uint_sat( void *, void *);
-static void double2int_sat( void *, void *);
-static void double2float_sat( void *, void *);
-static void double2ulong_sat( void *, void *);
-static void double2long_sat( void *, void *);
-static void ulong2uchar_sat( void *, void *);
-static void ulong2char_sat( void *, void *);
-static void ulong2ushort_sat( void *, void *);
-static void ulong2short_sat( void *, void *);
-static void ulong2uint_sat( void *, void *);
-static void ulong2int_sat( void *, void *);
-static void ulong2float_sat( void *, void *);
-static void ulong2double_sat( void *, void *);
-static void ulong2long_sat( void *, void *);
-static void long2uchar_sat( void *, void *);
-static void long2char_sat( void *, void *);
-static void long2ushort_sat( void *, void *);
-static void long2short_sat( void *, void *);
-static void long2uint_sat( void *, void *);
-static void long2int_sat( void *, void *);
-static void long2float_sat( void *, void *);
-static void long2double_sat( void *, void *);
-static void long2ulong_sat( void *, void *);
-/*
- #include <stdio.h>
-
- const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
-
- int main( void )
- {
+ // Crate a user event to represent when the callbacks are done verifying
+ // correctness
+ writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error);
+ if (error || NULL == writeInputBufferInfo.doneBarrier)
+ {
+ vlog_error("ERROR: Unable to create user event for barrier. (%d)\n",
+ error);
+ gFailCount++;
+ return error;
+ }
- int i,j;
+ // retain for use by the callback that calls this
+ if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier)))
+ {
+ vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n",
+ error);
+ gFailCount++;
+ return error;
+ }
- for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
+ // Call this in a multithreaded manner
+ cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2;
+ init_info.start = i;
+ init_info.size = count / chunks;
+ if (init_info.size < 16384)
{
- vlog( "{ " );
- for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
+ chunks = RoundUpToNextPowerOfTwo(threads);
+ init_info.size = count / chunks;
+ if (init_info.size < 16384)
{
- if( j == i )
- vlog( " NULL, " );
- else
- {
- char s[64];
- sprintf( s, "%s2%s_sat,", names[j], names[i] );
- vlog( "%18s ", s );
- }
+ init_info.size = count;
+ chunks = 1;
}
- vlog( "},\n" );
}
- return 0;
- }
+ ThreadPool_Do(conv_test::InitData, chunks, &init_info);
-Convert gSaturatedConversions[kTypeCount][kTypeCount] = {
-{ NULL, char2uchar_sat, ushort2uchar_sat, short2uchar_sat, uint2uchar_sat, int2uchar_sat, float2uchar_sat, double2uchar_sat, ulong2uchar_sat, long2uchar_sat, },
-{ uchar2char_sat, NULL, ushort2char_sat, short2char_sat, uint2char_sat, int2char_sat, float2char_sat, double2char_sat, ulong2char_sat, long2char_sat, },
-{ uchar2ushort_sat, char2ushort_sat, NULL, short2ushort_sat, uint2ushort_sat, int2ushort_sat, float2ushort_sat, double2ushort_sat, ulong2ushort_sat, long2ushort_sat, },
-{ uchar2short_sat, char2short_sat, ushort2short_sat, NULL, uint2short_sat, int2short_sat, float2short_sat, double2short_sat, ulong2short_sat, long2short_sat, },
-{ uchar2uint_sat, char2uint_sat, ushort2uint_sat, short2uint_sat, NULL, int2uint_sat, float2uint_sat, double2uint_sat, ulong2uint_sat, long2uint_sat, },
-{ uchar2int_sat, char2int_sat, ushort2int_sat, short2int_sat, uint2int_sat, NULL, float2int_sat, double2int_sat, ulong2int_sat, long2int_sat, },
-{ uchar2float_sat, char2float_sat, ushort2float_sat, short2float_sat, uint2float_sat, int2float_sat, NULL, double2float_sat, ulong2float_sat, long2float_sat, },
-{ uchar2double_sat, char2double_sat, ushort2double_sat, short2double_sat, uint2double_sat, int2double_sat, float2double_sat, NULL, ulong2double_sat, long2double_sat, },
-{ uchar2ulong_sat, char2ulong_sat, ushort2ulong_sat, short2ulong_sat, uint2ulong_sat, int2ulong_sat, float2ulong_sat, double2ulong_sat, NULL, long2ulong_sat, },
-{ uchar2long_sat, char2long_sat, ushort2long_sat, short2long_sat, uint2long_sat, int2long_sat, float2long_sat, double2long_sat, ulong2long_sat, NULL, }
-};
-*/
+ // Copy the results to the device
+ if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
+ count * gTypeSizes[inType], gIn, 0,
+ NULL, NULL)))
+ {
+ vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error);
+ gFailCount++;
+ return error;
+ }
-/*
- #include <stdio.h>
+ // Call completion callback for the write, which will enqueue the rest
+ // of the work.
+ conv_test::WriteInputBufferComplete((void *)&writeInputBufferInfo);
- const char *names[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "long" };
- const char *types[] = { "uchar", "char", "ushort", "short", "uint", "int", "float", "double", "ulong", "llong" };
+ // Make sure the work is actually running, so we don't deadlock
+ if ((error = clFlush(gQueue)))
+ {
+ vlog_error("clFlush failed with error %d\n", error);
+ gFailCount++;
+ return error;
+ }
- int main( void )
- {
+ ThreadPool_Do(conv_test::PrepareReference, chunks, &init_info);
+
+ // signal we are done calculating the reference results
+ if ((error = clSetUserEventStatus(
+ writeInputBufferInfo.calcReferenceValues, CL_COMPLETE)))
+ {
+ vlog_error(
+ "Error: Failed to set user event status to CL_COMPLETE: %d\n",
+ error);
+ gFailCount++;
+ return error;
+ }
- int i,j;
+ // Wait for the event callbacks to finish verifying correctness.
+ if ((error = clWaitForEvents(
+ 1, (cl_event *)&writeInputBufferInfo.doneBarrier)))
+ {
+ vlog_error("Error: Failed to wait for barrier: %d\n", error);
+ gFailCount++;
+ return error;
+ }
- for( i = 0; i < sizeof( names ) / sizeof( names[0] ); i++ )
- for( j = 0; j < sizeof( names ) / sizeof( names[0] ); j++ )
- {
- if( j == i )
- continue;
+ if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues)))
+ {
+ vlog_error("Error: Failed to release calcReferenceValues: %d\n",
+ error);
+ gFailCount++;
+ return error;
+ }
- switch( i )
+ if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier)))
+ {
+ vlog_error("Error: Failed to release done barrier: %d\n", error);
+ gFailCount++;
+ return error;
+ }
+
+ for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
+ vectorSize++)
+ {
+ if ((error = writeInputBufferInfo.calcInfo[vectorSize]->result))
+ {
+ switch (inType)
{
- case 6: //float
- if( j == 7 )
- vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
- else
- vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) my_rintf(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] );
+ case kuchar:
+ case kchar:
+ vlog("Input value: 0x%2.2x ",
+ ((unsigned char *)gIn)[error - 1]);
+ break;
+ case kushort:
+ case kshort:
+ vlog("Input value: 0x%4.4x ",
+ ((unsigned short *)gIn)[error - 1]);
+ break;
+ case kuint:
+ case kint:
+ vlog("Input value: 0x%8.8x ",
+ ((unsigned int *)gIn)[error - 1]);
break;
- case 7: //double
- if( j == 6 )
- vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
- else
- vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s) rint(((%s*) in)[0]); }\n", names[i], names[i], names[j], types[j], types[i] );
+ case kfloat:
+ vlog("Input value: %a ", ((float *)gIn)[error - 1]);
+ break;
+ case kulong:
+ case klong:
+ vlog("Input value: 0x%16.16llx ",
+ ((unsigned long long *)gIn)[error - 1]);
+ break;
+ case kdouble:
+ vlog("Input value: %a ", ((double *)gIn)[error - 1]);
break;
default:
- vlog( "void %s2%s( void *out, void *in){ ((%s*) out)[0] = (%s)
- ((%s*) in)[0]; }\n", names[i], names[i], names[j], types[j], types[i] );
+ vlog_error("Internal error at %s: %d\n", __FILE__,
+ __LINE__);
+ abort();
break;
}
- }
+ // tell the user which conversion it was.
+ if (0 == vectorSize)
+ vlog(" (implicit scalar conversion from %s to %s)\n",
+ gTypeNames[inType], gTypeNames[outType]);
+ else
+ vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType],
+ sizeNames[vectorSize], gSaturationNames[sat],
+ gRoundingModeNames[round], gTypeNames[inType],
+ sizeNames[vectorSize]);
- return 0;
+ gFailCount++;
+ return error;
+ }
+ }
}
-*/
-float my_fabsf( float x )
-{
- union{ cl_uint u; float f; }u;
- u.f = x;
- u.u &= 0x7fffffff;
- return u.f;
-}
+ log_info("done.\n");
-double my_fabs( double x )
-{
- union{ cl_ulong u; double f; }u;
- u.f = x;
- u.u &= 0x7fffffffffffffffULL;
- return u.f;
-}
+ if (gTimeResults)
+ {
+ // Kick off tests for the various vector lengths
+ for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
+ vectorSize++)
+ {
+ size_t workItemCount = blockCount / vectorSizes[vectorSize];
+ if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4)
+ workItemCount /=
+ 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
+
+ double sum = 0.0;
+ double bestTime = INFINITY;
+ cl_uint k;
+ for (k = 0; k < PERF_LOOP_COUNT; k++)
+ {
+ uint64_t startTime = conv_test::GetTime();
+ if ((error = conv_test::RunKernel(
+ writeInputBufferInfo.calcInfo[vectorSize]->kernel,
+ gInBuffer, gOutBuffers[vectorSize], workItemCount)))
+ {
+ gFailCount++;
+ return error;
+ }
-static float my_rintf( float f );
-static float my_rintf( float f )
-{
- static const float magic[2] = { MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23), - MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23) };
+ // Make sure OpenCL is done
+ if ((error = clFinish(gQueue)))
+ {
+ vlog_error("Error %d at clFinish\n", error);
+ return error;
+ }
- // Round fractional values to integer in round towards nearest mode
- if( fabsf(f) < MAKE_HEX_FLOAT( 0x1.0p23f, 0x1, 23 ) )
- {
- volatile float x = f;
- float magicVal = magic[ f < 0 ];
-
-#if defined( __SSE__ )
- // Defeat x87 based arithmetic, which cant do FTZ, and will round this incorrectly
- __m128 v = _mm_set_ss( x );
- __m128 m = _mm_set_ss( magicVal );
- v = _mm_add_ss( v, m );
- v = _mm_sub_ss( v, m );
- _mm_store_ss( (float*) &x, v );
-#else
- x += magicVal;
- x -= magicVal;
-#endif
- f = x;
+ uint64_t endTime = conv_test::GetTime();
+ double time = SubtractTime(endTime, startTime);
+ sum += time;
+ if (time < bestTime) bestTime = time;
+ }
+
+ if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
+ double clocksPerOp = bestTime * (double)gDeviceFrequency
+ * gComputeDevices * gSimdSize * 1e6
+ / (workItemCount * vectorSizes[vectorSize]);
+ if (0 == vectorSize)
+ vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
+ "implicit convert %s -> %s", gTypeNames[inType],
+ gTypeNames[outType]);
+ else
+ vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
+ "convert_%s%s%s%s( %s%s )", gTypeNames[outType],
+ sizeNames[vectorSize], gSaturationNames[sat],
+ gRoundingModeNames[round], gTypeNames[inType],
+ sizeNames[vectorSize]);
+ }
}
- return f;
-}
+ if (gWimpyMode)
+ vlog("\tWimp pass");
+ else
+ vlog("\tpassed");
-static void uchar2char( void *out, void *in){ ((char*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2short( void *out, void *in){ ((short*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2int( void *out, void *in){ ((int*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2float( void *out, void *in)
-{
- cl_uchar l = ((cl_uchar*) in)[0];
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void uchar2double( void *out, void *in)
-{
- cl_uchar l = ((cl_uchar*) in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void uchar2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; }
-static void char2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_char*) in)[0]; }
-static void char2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_char*) in)[0]; }
-static void char2short( void *out, void *in){ ((short*) out)[0] = ((cl_char*) in)[0]; }
-static void char2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_char*) in)[0]; }
-static void char2int( void *out, void *in){ ((int*) out)[0] = ((cl_char*) in)[0]; }
-static void char2float( void *out, void *in)
-{
- cl_char l = ((cl_char*) in)[0];
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void char2double( void *out, void *in)
-{
- cl_char l = ((cl_char*) in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void char2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_char*) in)[0]; }
-static void char2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; }
-static void ushort2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2char( void *out, void *in){ ((char*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2short( void *out, void *in){ ((short*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2int( void *out, void *in){ ((int*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2float( void *out, void *in)
-{
- cl_ushort l = ((cl_ushort*) in)[0];
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void ushort2double( void *out, void *in)
-{
- cl_ushort l = ((cl_ushort*) in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void ushort2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; }
-static void short2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_short*) in)[0]; }
-static void short2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_short*) in)[0]; }
-static void short2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_short*) in)[0]; }
-static void short2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_short*) in)[0]; }
-static void short2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; }
-static void short2float( void *out, void *in)
-{
- cl_short l = ((cl_short*) in)[0];
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void short2double( void *out, void *in)
-{
- cl_short l = ((cl_short*) in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void short2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_short*) in)[0]; }
-static void short2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; }
-static void uint2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_uint*) in)[0]; }
-static void uint2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_uint*) in)[0]; }
-static void uint2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uint*) in)[0]; }
-static void uint2short( void *out, void *in){ ((short*) out)[0] = ((cl_uint*) in)[0]; }
-static void uint2int( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uint*) in)[0]; }
-static void uint2float( void *out, void *in)
-{
- // Use volatile to prevent optimization by Clang compiler
- volatile cl_uint l = ((cl_uint *)in)[0];
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void uint2double( void *out, void *in)
-{
- cl_uint l = ((cl_uint*) in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void uint2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; }
-static void uint2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; }
-static void int2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = ((cl_int*) in)[0]; }
-static void int2char( void *out, void *in){ ((cl_char*) out)[0] = ((cl_int*) in)[0]; }
-static void int2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_int*) in)[0]; }
-static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) in)[0]; }
-static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; }
-static void int2float( void *out, void *in)
-{
- // Use volatile to prevent optimization by Clang compiler
- volatile cl_int l = ((cl_int *)in)[0];
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void int2double( void *out, void *in)
-{
- cl_int l = ((cl_int*) in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-}
-static void int2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_int*) in)[0]; }
-static void int2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; }
-static void float2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = my_rintf(((cl_float*) in)[0]); }
-static void float2char( void *out, void *in){ ((cl_char*) out)[0] = my_rintf(((cl_float*) in)[0]); }
-static void float2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = my_rintf(((cl_float*) in)[0]); }
-static void float2short( void *out, void *in){ ((cl_short*) out)[0] = my_rintf(((cl_float*) in)[0]); }
-static void float2uint( void *out, void *in){ ((cl_uint*) out)[0] = my_rintf(((cl_float*) in)[0]); }
-static void float2int( void *out, void *in){ ((cl_int*) out)[0] = my_rintf(((cl_float*) in)[0]); }
-static void float2double( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; }
-static void float2ulong( void *out, void *in)
-{
-#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
- // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int.
- // However, fistp stores it as a signed int, and some of the test values won't
- // fit into a signed int. (These test values are >= 2^63.) The result on VS2005
- // is that these end up silently (at least by default settings) clamped to
- // the max lowest ulong.
- cl_float x = my_rintf(((cl_float *)in)[0]);
- if (x >= 9223372036854775808.0f) {
- x -= 9223372036854775808.0f;
- ((cl_ulong*) out)[0] = x;
- ((cl_ulong*) out)[0] += 9223372036854775808ULL;
- } else {
- ((cl_ulong*) out)[0] = x;
- }
-#else
- ((cl_ulong*) out)[0] = my_rintf(((cl_float*) in)[0]);
+#ifdef __APPLE__
+ // record the run time
+ vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start));
#endif
-}
+ vlog("\n\n");
+ fflush(stdout);
-static void float2long( void *out, void *in){ ((cl_long*) out)[0] = llrint_clamped( ((cl_float*) in)[0] ); }
-static void double2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = rint(((cl_double*) in)[0]); }
-static void double2char( void *out, void *in){ ((cl_char*) out)[0] = rint(((cl_double*) in)[0]); }
-static void double2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = rint(((cl_double*) in)[0]); }
-static void double2short( void *out, void *in){ ((cl_short*) out)[0] = rint(((cl_double*) in)[0]); }
-static void double2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) rint(((cl_double*) in)[0]); }
-static void double2int( void *out, void *in){ ((cl_int*) out)[0] = (int) rint(((cl_double*) in)[0]); }
-static void double2float( void *out, void *in){ ((cl_float*) out)[0] = (float) ((cl_double*) in)[0]; }
-static void double2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = (cl_ulong) rint(((cl_double*) in)[0]); }
-static void double2long( void *out, void *in){ ((cl_long*) out)[0] = (cl_long) rint(((cl_double*) in)[0]); }
-static void ulong2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_ulong*) in)[0]; }
-static void ulong2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_ulong*) in)[0]; }
-static void ulong2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_ulong*) in)[0]; }
-static void ulong2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short)((cl_ulong*) in)[0]; }
-static void ulong2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_ulong*) in)[0]; }
-static void ulong2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_ulong*) in)[0]; }
-static void ulong2float( void *out, void *in)
-{
-#if defined(_MSC_VER) && defined(_M_X64)
- cl_ulong l = ((cl_ulong*) in)[0];
- float result;
- cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
- _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
- ((float*) out)[0] = (l == 0 ? 0.0f : (((cl_long)l < 0) ? result * 2.0f : result));
-#else
- cl_ulong l = ((cl_ulong*) in)[0];
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
- /* ARM VFP doesn't have hardware instruction for converting from 64-bit
- * integer to float types, hence GCC ARM uses the floating-point emulation
- * code despite which -mfloat-abi setting it is. But the emulation code in
- * libgcc.a has only one rounding mode (round to nearest even in this case)
- * and ignores the user rounding mode setting in hardware.
- * As a result setting rounding modes in hardware won't give correct
- * rounding results for type covert from 64-bit integer to float using GCC
- * for ARM compiler so for testing different rounding modes, we need to use
- * alternative reference function. ARM64 does have an instruction, however
- * we cannot guarantee the compiler will use it. On all ARM architechures
- * use emulation to calculate reference.*/
- ((float*) out)[0] = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
-#else
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-#endif
-#endif
+ return error;
}
-static void ulong2double( void *out, void *in)
-{
-#if defined(_MSC_VER)
- cl_ulong l = ((cl_ulong*) in)[0];
- double result;
- cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1)) : (cl_long)l;
-#if defined(_M_X64)
- _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl));
-#else
- result = sl;
+#if !defined(__APPLE__)
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
#endif
- ((double*) out)[0] = (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
-#else
- // Use volatile to prevent optimization by Clang compiler
- volatile cl_ulong l = ((cl_ulong *)in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-#endif
-}
-static void ulong2long( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ulong*) in)[0]; }
-static void long2uchar( void *out, void *in){ ((cl_uchar*) out)[0] = (cl_uchar) ((cl_long*) in)[0]; }
-static void long2char( void *out, void *in){ ((cl_char*) out)[0] = (cl_char) ((cl_long*) in)[0]; }
-static void long2ushort( void *out, void *in){ ((cl_ushort*) out)[0] = (cl_ushort) ((cl_long*) in)[0]; }
-static void long2short( void *out, void *in){ ((cl_short*) out)[0] = (cl_short) ((cl_long*) in)[0]; }
-static void long2uint( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) ((cl_long*) in)[0]; }
-static void long2int( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) ((cl_long*) in)[0]; }
-static void long2float( void *out, void *in)
-{
-#if defined(_MSC_VER) && defined(_M_X64)
- cl_long l = ((cl_long*) in)[0];
- float result;
- _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l));
- ((float*) out)[0] = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-#else
- cl_long l = ((cl_long*) in)[0];
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
- /* ARM VFP doesn't have hardware instruction for converting from 64-bit
- * integer to float types, hence GCC ARM uses the floating-point emulation
- * code despite which -mfloat-abi setting it is. But the emulation code in
- * libgcc.a has only one rounding mode (round to nearest even in this case)
- * and ignores the user rounding mode setting in hardware.
- * As a result setting rounding modes in hardware won't give correct
- * rounding results for type covert from 64-bit integer to float using GCC
- * for ARM compiler so for testing different rounding modes, we need to use
- * alternative reference function. ARM64 does have an instruction, however
- * we cannot guarantee the compiler will use it. On all ARM architechures
- * use emulation to calculate reference.*/
- ((float*) out)[0] = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
+#if defined(_MSC_VER)
+/* function is defined in "compat.h" */
#else
- ((float*) out)[0] = (l == 0 ? 0.0f : (float) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
-#endif
-#endif
-}
-static void long2double( void *out, void *in)
+double SubtractTime(uint64_t endTime, uint64_t startTime)
{
-#if defined(_MSC_VER) && defined(_M_X64)
- cl_long l = ((cl_long*) in)[0];
- double result;
+ uint64_t diff = endTime - startTime;
+ static double conversion = 0.0;
- _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l));
- ((double*) out)[0] = (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+ if (0.0 == conversion)
+ {
+#if defined(__APPLE__)
+ mach_timebase_info_data_t info = { 0, 0 };
+ kern_return_t err = mach_timebase_info(&info);
+ if (0 == err)
+ conversion = 1e-9 * (double)info.numer / (double)info.denom;
#else
- cl_long l = ((cl_long*) in)[0];
- ((double*) out)[0] = (l == 0 ? 0.0 : (double) l); // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
+ // This function consumes output from GetTime() above, and converts the
+ // time to secionds.
+#warning need accurate ticks to seconds conversion factor here. Times are invalid.
#endif
-}
-static void long2ulong( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_long*) in)[0]; }
-
-#define CLAMP( _lo, _x, _hi ) ( (_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
-
-// Done by hand
-static void uchar2char_sat( void *out, void *in){ cl_uchar c = ((cl_uchar*) in)[0]; ((cl_char*) out)[0] = c > 0x7f ? 0x7f : c; }
-static void uchar2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
-static void uchar2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uchar*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
-static void uchar2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uchar*) in)[0]; }
-static void uchar2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uchar*) in)[0]; }
-static void char2uchar_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uchar*) out)[0] = c < 0 ? 0 : c; }
-static void char2ushort_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ushort*) out)[0] = c < 0 ? 0 : c; }
-static void char2short_sat( void *out, void *in){ ((cl_short*) out)[0] = ((cl_char*) in)[0]; }
-static void char2uint_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_uint*) out)[0] = c < 0 ? 0 : c; }
-static void char2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_char*) in)[0]; }
-static void char2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_char*) in)[0]; }
-static void char2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_char*) in)[0]; }
-static void char2ulong_sat( void *out, void *in){ cl_char c = ((cl_char*) in)[0]; ((cl_ulong*) out)[0] = c < 0 ? 0 : c; }
-static void char2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_char*) in)[0]; }
-static void ushort2uchar_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_uchar*) out)[0] = u > 0xff ? 0xFF : u; }
-static void ushort2char_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_char*) out)[0] = u > 0x7f ? 0x7F : u; }
-static void ushort2short_sat( void *out, void *in){ cl_ushort u = ((cl_ushort*) in)[0]; ((cl_short*) out)[0] = u > 0x7fff ? 0x7fFF : u; }
-static void ushort2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf((cl_float)((cl_ushort*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
-static void ushort2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_ushort*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
-static void ushort2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_ushort*) in)[0]; }
-static void ushort2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_ushort*) in)[0]; }
-static void short2uchar_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, s, CL_UCHAR_MAX ); }
-static void short2char_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, s, CL_CHAR_MAX ); }
-static void short2ushort_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ushort*) out)[0] = s < 0 ? 0 : s; }
-static void short2uint_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_uint*) out)[0] = s < 0 ? 0 : s; }
-static void short2int_sat( void *out, void *in){ ((cl_int*) out)[0] = ((cl_short*) in)[0]; }
-static void short2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_short*) in)[0]; }
-static void short2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_short*) in)[0]; }
-static void short2ulong_sat( void *out, void *in){ cl_short s = ((cl_short*) in)[0]; ((cl_ulong*) out)[0] = s < 0 ? 0 : s; }
-static void short2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_short*) in)[0]; }
-static void uint2uchar_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX); }
-static void uint2char_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); }
-static void uint2ushort_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX); }
-static void uint2short_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX); }
-static void uint2int_sat( void *out, void *in){ cl_uint u = ((cl_uint*) in)[0]; ((cl_int*) out)[0] = CLAMP( 0, u, CL_INT_MAX); }
-static void uint2float_sat( void *out, void *in){ ((cl_float*) out)[0] = my_fabsf( (cl_float) ((cl_uint*) in)[0] ); } // my_fabs workaround for <rdar://problem/5965527>
-static void uint2double_sat( void *out, void *in){ ((cl_double*) out)[0] = my_fabs( (cl_double) ((cl_uint*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
-static void uint2ulong_sat( void *out, void *in){ ((cl_ulong*) out)[0] = ((cl_uint*) in)[0]; }
-static void uint2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_uint*) in)[0]; }
-static void int2uchar_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, i, CL_UCHAR_MAX); }
-static void int2char_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, i, CL_CHAR_MAX); }
-static void int2ushort_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, i, CL_USHRT_MAX); }
-static void int2short_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, i, CL_SHRT_MAX); }
-static void int2uint_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_uint*) out)[0] = CLAMP( 0, i, CL_INT_MAX); }
-static void int2float_sat( void *out, void *in){ ((cl_float*) out)[0] = ((cl_int*) in)[0]; }
-static void int2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_int*) in)[0]; }
-static void int2ulong_sat( void *out, void *in){ cl_int i = ((cl_int*) in)[0]; ((cl_ulong*) out)[0] = i < 0 ? 0 : i; }
-static void int2long_sat( void *out, void *in){ ((cl_long*) out)[0] = ((cl_int*) in)[0]; }
-static void float2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_UCHAR_MAX ); }
-static void float2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_CHAR_MAX); }
-static void float2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrintf_clamped(((cl_float*) in)[0]), CL_USHRT_MAX ); }
-static void float2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_SHRT_MAX ); }
-static void float2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrintf_clamped(((cl_float*) in)[0]), CL_UINT_MAX ); }
-static void float2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrintf_clamped(((cl_float*) in)[0]), CL_INT_MAX ); }
-static void float2double_sat( void *out, void *in){ ((cl_double*) out)[0] = ((cl_float*) in)[0]; }
-static void float2ulong_sat( void *out, void *in)
-{
-#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
- // VS2005 (at least) on x86 uses fistp to store the float as a 64-bit int.
- // However, fistp stores it as a signed int, and some of the test values won't
- // fit into a signed int. (These test values are >= 2^63.) The result on VS2005
- // is that these end up silently (at least by default settings) clamped to
- // the max lowest ulong.
- cl_float x = my_rintf(((cl_float *)in)[0]);
- if (x >= 18446744073709551616.0f) { // 2^64
- ((cl_ulong*) out)[0] = 0xFFFFFFFFFFFFFFFFULL;
- } else if (x < 0) {
- ((cl_ulong*) out)[0] = 0;
- } else if (x >= 9223372036854775808.0f) { // 2^63
- x -= 9223372036854775808.0f;
- ((cl_ulong*) out)[0] = x;
- ((cl_ulong*) out)[0] += 9223372036854775808ULL;
- } else {
- ((cl_ulong*) out)[0] = x;
}
-#else
- float f = my_rintf(((float*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f;
-#endif
+
+ // strictly speaking we should also be subtracting out timer latency here
+ return conversion * (double)diff;
}
-// The final cast used to be (cl_ulong) f, but on Linux (RHEL5 at least)
-// if f = -1.0f, then (cl_ulong) f = 0xffffffff, which clearly isn't right.
-// Switching it to (cl_long) f seems to fix that.
-static void float2long_sat( void *out, void *in){ float f = my_rintf(((float*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; }
-static void double2uchar_sat( void *out, void *in){ ((cl_uchar*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_UCHAR_MAX ); }
-static void double2char_sat( void *out, void *in){ ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, lrint_clamped(((cl_double*) in)[0]), CL_CHAR_MAX); }
-static void double2ushort_sat( void *out, void *in){ ((cl_ushort*) out)[0] = CLAMP( 0, lrint_clamped(((cl_double*) in)[0]), CL_USHRT_MAX ); }
-static void double2short_sat( void *out, void *in){ ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_SHRT_MAX ); }
-static void double2uint_sat( void *out, void *in){ ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, llrint_clamped(((cl_double*) in)[0]), CL_UINT_MAX ); }
-static void double2int_sat( void *out, void *in){ ((cl_int*) out)[0] = (cl_int) CLAMP( CL_INT_MIN, lrint_clamped(((cl_double*) in)[0]), CL_INT_MAX ); }
-static void double2float_sat( void *out, void *in){ ((cl_float*) out)[0] = (cl_float) ((double*) in)[0]; }
-static void double2ulong_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_ulong*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64) ? 0xFFFFFFFFFFFFFFFFULL : f < 0 ? 0 : (cl_ulong) f; }
-static void double2long_sat( void *out, void *in){ double f = rint(((double*) in)[0]); ((cl_long*) out)[0] = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63) ? 0x7FFFFFFFFFFFFFFFULL : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63) ? 0x8000000000000000LL : (cl_long) f; }
-static void ulong2uchar_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); }
-static void ulong2char_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_char*) out)[0] = CLAMP( 0, u, CL_CHAR_MAX ); }
-static void ulong2ushort_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); }
-static void ulong2short_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_short*) out)[0] = CLAMP( 0, u, CL_SHRT_MAX ); }
-static void ulong2uint_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); }
-static void ulong2int_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_int*) out)[0] = (cl_int) CLAMP( 0, u, CL_INT_MAX ); }
-static void ulong2float_sat( void *out, void *in){ ((float*) out)[0] = my_fabsf((float) ((cl_ulong*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
-static void ulong2double_sat( void *out, void *in){ ((double*) out)[0] = my_fabs( ((cl_ulong*) in)[0]); } // my_fabs workaround for <rdar://problem/5965527>
-static void ulong2long_sat( void *out, void *in){ cl_ulong u = ((cl_ulong*) in)[0]; ((cl_long*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); }
-static void long2uchar_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uchar*) out)[0] = CLAMP( 0, u, CL_UCHAR_MAX ); }
-static void long2char_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_char*) out)[0] = CLAMP( CL_CHAR_MIN, u, CL_CHAR_MAX ); }
-static void long2ushort_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ushort*) out)[0] = CLAMP( 0, u, CL_USHRT_MAX ); }
-static void long2short_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_short*) out)[0] = CLAMP( CL_SHRT_MIN, u, CL_SHRT_MAX ); }
-static void long2uint_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_uint*) out)[0] = (cl_uint) CLAMP( 0, u, CL_UINT_MAX ); }
-static void long2int_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_int*) out)[0] = (int) CLAMP( CL_INT_MIN, u, CL_INT_MAX ); }
-static void long2float_sat( void *out, void *in){ ((float*) out)[0] = (float) ((cl_long*) in)[0]; }
-static void long2double_sat( void *out, void *in){ ((double*) out)[0] = ((cl_long*) in)[0]; }
-static void long2ulong_sat( void *out, void *in){ cl_long u = ((cl_long*) in)[0]; ((cl_ulong*) out)[0] = CLAMP( 0, u, CL_LONG_MAX ); }
-
-/*
-#include <stdio.h>
-
-char *ground[] = { "",
- "_rte",
- "_rtp",
- "_rtn",
- "_rtz"
- };
-
-const char *gTypeNames[ ] = {
- "uchar", "char",
- "ushort", "short",
- "uint", "int",
- "float", "double",
- "ulong", "long"
- };
-
-
-int main( void )
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+
+static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
{
- int i, j;
+ cl_uint i;
+ for (i = 0; i < count; ++i)
+ allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
+}
- for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ )
- for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ )
- {
- vlog( "float clampf_%s%s( float );\n", gTypeNames[i], ground[j] );
- vlog( "double clampd_%s%s( double );\n", gTypeNames[i], ground[j] );
- }
- return 0;
+void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &ptr);
-}
-*/
-
-
-float clampf_uchar( float );
-double clampd_uchar( double );
-float clampf_uchar_rte( float );
-double clampd_uchar_rte( double );
-float clampf_uchar_rtp( float );
-double clampd_uchar_rtp( double );
-float clampf_uchar_rtn( float );
-double clampd_uchar_rtn( double );
-float clampf_uchar_rtz( float );
-double clampd_uchar_rtz( double );
-float clampf_char( float );
-double clampd_char( double );
-float clampf_char_rte( float );
-double clampd_char_rte( double );
-float clampf_char_rtp( float );
-double clampd_char_rtp( double );
-float clampf_char_rtn( float );
-double clampd_char_rtn( double );
-float clampf_char_rtz( float );
-double clampd_char_rtz( double );
-float clampf_ushort( float );
-double clampd_ushort( double );
-float clampf_ushort_rte( float );
-double clampd_ushort_rte( double );
-float clampf_ushort_rtp( float );
-double clampd_ushort_rtp( double );
-float clampf_ushort_rtn( float );
-double clampd_ushort_rtn( double );
-float clampf_ushort_rtz( float );
-double clampd_ushort_rtz( double );
-float clampf_short( float );
-double clampd_short( double );
-float clampf_short_rte( float );
-double clampd_short_rte( double );
-float clampf_short_rtp( float );
-double clampd_short_rtp( double );
-float clampf_short_rtn( float );
-double clampd_short_rtn( double );
-float clampf_short_rtz( float );
-double clampd_short_rtz( double );
-float clampf_uint( float );
-double clampd_uint( double );
-float clampf_uint_rte( float );
-double clampd_uint_rte( double );
-float clampf_uint_rtp( float );
-double clampd_uint_rtp( double );
-float clampf_uint_rtn( float );
-double clampd_uint_rtn( double );
-float clampf_uint_rtz( float );
-double clampd_uint_rtz( double );
-float clampf_int( float );
-double clampd_int( double );
-float clampf_int_rte( float );
-double clampd_int_rte( double );
-float clampf_int_rtp( float );
-double clampd_int_rtp( double );
-float clampf_int_rtn( float );
-double clampd_int_rtn( double );
-float clampf_int_rtz( float );
-double clampd_int_rtz( double );
-float clampf_float( float );
-double clampd_float( double );
-float clampf_float_rte( float );
-double clampd_float_rte( double );
-float clampf_float_rtp( float );
-double clampd_float_rtp( double );
-float clampf_float_rtn( float );
-double clampd_float_rtn( double );
-float clampf_float_rtz( float );
-double clampd_float_rtz( double );
-float clampf_double( float );
-double clampd_double( double );
-float clampf_double_rte( float );
-double clampd_double_rte( double );
-float clampf_double_rtp( float );
-double clampd_double_rtp( double );
-float clampf_double_rtn( float );
-double clampd_double_rtn( double );
-float clampf_double_rtz( float );
-double clampd_double_rtz( double );
-float clampf_ulong( float );
-double clampd_ulong( double );
-float clampf_ulong_rte( float );
-double clampd_ulong_rte( double );
-float clampf_ulong_rtp( float );
-double clampd_ulong_rtp( double );
-float clampf_ulong_rtn( float );
-double clampd_ulong_rtn( double );
-float clampf_ulong_rtz( float );
-double clampd_ulong_rtz( double );
-float clampf_long( float );
-double clampd_long( double );
-float clampf_long_rte( float );
-double clampd_long_rte( double );
-float clampf_long_rtp( float );
-double clampd_long_rtp( double );
-float clampf_long_rtn( float );
-double clampd_long_rtn( double );
-float clampf_long_rtz( float );
-double clampd_long_rtz( double );
-
-/*
-#include <stdio.h>
-
-char *ground[] = { "",
- "_rte",
- "_rtp",
- "_rtn",
- "_rtz"
- };
-
-const char *gTypeNames[ ] = {
- "uchar", "char",
- "ushort", "short",
- "uint", "int",
- "float", "double",
- "ulong", "long"
- };
-
-
-int main( void )
-{
- int i, j;
+void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
+ void *data);
- for( i = 0; i < sizeof( gTypeNames ) / sizeof( gTypeNames[0] ); i++ )
+// Note: May be called reentrantly
+void MapResultValuesComplete(const std::unique_ptr<CalcRefValsBase> &info)
+{
+ cl_int status;
+ // CalcRefValsBase *info = (CalcRefValsBase *)data;
+ cl_event calcReferenceValues = info->parent->calcReferenceValues;
+
+ // we know that the map is done, wait for the main thread to finish
+ // calculating the reference values
+ if ((status =
+ clSetEventCallback(calcReferenceValues, CL_COMPLETE,
+ CalcReferenceValuesComplete, (void *)&info)))
{
- vlog( "{\t" );
- for( j = 0; j < sizeof( ground ) / sizeof( ground[0] ); j++ )
- vlog( "clampf_%s%s,\t", gTypeNames[i], ground[j] );
+ vlog_error("ERROR: clSetEventCallback failed in "
+ "MapResultValuesComplete with status: %d\n",
+ status);
+ gFailCount++; // not thread safe -- being lazy here
+ }
- vlog( "\t},\n" );
+ // this thread no longer needs its reference to info->calcReferenceValues,
+ // so release it
+ if ((status = clReleaseEvent(calcReferenceValues)))
+ {
+ vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed "
+ "with status: %d\n",
+ status);
+ gFailCount++; // not thread safe -- being lazy here
}
- return 0;
+ // no need to flush since we didn't enqueue anything
+ // e was already released by WriteInputBufferComplete. It should be
+ // destroyed automatically soon after we exit.
}
-*/
-clampf gClampFloat[ kTypeCount ][kRoundingModeCount] = {
- { clampf_uchar, clampf_uchar_rte, clampf_uchar_rtp, clampf_uchar_rtn, clampf_uchar_rtz, },
- { clampf_char, clampf_char_rte, clampf_char_rtp, clampf_char_rtn, clampf_char_rtz, },
- { clampf_ushort, clampf_ushort_rte, clampf_ushort_rtp, clampf_ushort_rtn, clampf_ushort_rtz, },
- { clampf_short, clampf_short_rte, clampf_short_rtp, clampf_short_rtn, clampf_short_rtz, },
- { clampf_uint, clampf_uint_rte, clampf_uint_rtp, clampf_uint_rtn, clampf_uint_rtz, },
- { clampf_int, clampf_int_rte, clampf_int_rtp, clampf_int_rtn, clampf_int_rtz, },
- { clampf_float, clampf_float_rte, clampf_float_rtp, clampf_float_rtn, clampf_float_rtz, },
- { clampf_double, clampf_double_rte, clampf_double_rtp, clampf_double_rtn, clampf_double_rtz, },
- { clampf_ulong, clampf_ulong_rte, clampf_ulong_rtp, clampf_ulong_rtn, clampf_ulong_rtz, },
- { clampf_long, clampf_long_rte, clampf_long_rtp, clampf_long_rtn, clampf_long_rtz, }
-};
-
-clampd gClampDouble[ kTypeCount ][kRoundingModeCount] = {
- { clampd_uchar, clampd_uchar_rte, clampd_uchar_rtp, clampd_uchar_rtn, clampd_uchar_rtz, },
- { clampd_char, clampd_char_rte, clampd_char_rtp, clampd_char_rtn, clampd_char_rtz, },
- { clampd_ushort, clampd_ushort_rte, clampd_ushort_rtp, clampd_ushort_rtn, clampd_ushort_rtz, },
- { clampd_short, clampd_short_rte, clampd_short_rtp, clampd_short_rtn, clampd_short_rtz, },
- { clampd_uint, clampd_uint_rte, clampd_uint_rtp, clampd_uint_rtn, clampd_uint_rtz, },
- { clampd_int, clampd_int_rte, clampd_int_rtp, clampd_int_rtn, clampd_int_rtz, },
- { clampd_float, clampd_float_rte, clampd_float_rtp, clampd_float_rtn, clampd_float_rtz, },
- { clampd_double, clampd_double_rte, clampd_double_rtp, clampd_double_rtn, clampd_double_rtz, },
- { clampd_ulong, clampd_ulong_rte, clampd_ulong_rtp, clampd_ulong_rtn, clampd_ulong_rtz, },
- { clampd_long, clampd_long_rte, clampd_long_rtp, clampd_long_rtn, clampd_long_rtz, }
-};
-#if defined (_WIN32)
-#define __attribute__(X)
-#endif
-static inline float fclamp( float lo, float v, float hi ) __attribute__ ((always_inline));
-static inline double dclamp( double lo, double v, double hi ) __attribute__ ((always_inline));
-
-static inline float fclamp( float lo, float v, float hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; }
-static inline double dclamp( double lo, double v, double hi ){ v = v < lo ? lo : v; return v < hi ? v : hi; }
-
-// Clamp unsaturated inputs into range so we don't get test errors:
-float clampf_uchar( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); }
-double clampd_uchar( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); }
-float clampf_uchar_rte( float f ) { return fclamp( -0.5f, f, 255.5f - 128.0f * FLT_EPSILON ); }
-double clampd_uchar_rte( double f ) { return dclamp( -0.5, f, 255.5 - 128.0 * DBL_EPSILON ); }
-float clampf_uchar_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 255.0f ); }
-double clampd_uchar_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 255.0 ); }
-float clampf_uchar_rtn( float f ) { return fclamp( -0.0f, f, 256.0f - 128.0f * FLT_EPSILON); }
-double clampd_uchar_rtn( double f ) { return dclamp( -0.0, f, 256.0 - 128.0 * DBL_EPSILON); }
-float clampf_uchar_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 256.0f - 128.0f * FLT_EPSILON); }
-double clampd_uchar_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 256.0 - 128.0f * DBL_EPSILON); }
-
-float clampf_char( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); }
-double clampd_char( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); }
-float clampf_char_rte( float f ) { return fclamp( -128.5f, f, 127.5f - 64.f * FLT_EPSILON ); }
-double clampd_char_rte( double f ) { return dclamp( -128.5, f, 127.5 - 64. * DBL_EPSILON ); }
-float clampf_char_rtp( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 127.f ); }
-double clampd_char_rtp( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 127. ); }
-float clampf_char_rtn( float f ) { return fclamp( -128.0f, f, 128.f - 64.0f*FLT_EPSILON ); }
-double clampd_char_rtn( double f ) { return dclamp( -128.0, f, 128. - 64.0*DBL_EPSILON ); }
-float clampf_char_rtz( float f ) { return fclamp( -129.0f + 128.f*FLT_EPSILON, f, 128.f - 64.0f*FLT_EPSILON ); }
-double clampd_char_rtz( double f ) { return dclamp( -129.0 + 128.*DBL_EPSILON, f, 128. - 64.0*DBL_EPSILON ); }
-
-float clampf_ushort( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); }
-double clampd_ushort( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); }
-float clampf_ushort_rte( float f ) { return fclamp( -0.5f, f, 65535.5f - 32768.0f * FLT_EPSILON ); }
-double clampd_ushort_rte( double f ) { return dclamp( -0.5, f, 65535.5 - 32768.0 * DBL_EPSILON ); }
-float clampf_ushort_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65535.0f ); }
-double clampd_ushort_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65535.0 ); }
-float clampf_ushort_rtn( float f ) { return fclamp( -0.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); }
-double clampd_ushort_rtn( double f ) { return dclamp( -0.0, f, 65536.0 - 32768.0 * DBL_EPSILON); }
-float clampf_ushort_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, 65536.0f - 32768.0f * FLT_EPSILON); }
-double clampd_ushort_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, 65536.0 - 32768.0f * DBL_EPSILON); }
-
-float clampf_short( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); }
-double clampd_short( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); }
-float clampf_short_rte( float f ) { return fclamp( -32768.5f, f, 32767.5f - 16384.f * FLT_EPSILON ); }
-double clampd_short_rte( double f ) { return dclamp( -32768.5, f, 32767.5 - 16384. * DBL_EPSILON ); }
-float clampf_short_rtp( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32767.f ); }
-double clampd_short_rtp( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32767. ); }
-float clampf_short_rtn( float f ) { return fclamp( -32768.0f, f, 32768.f - 16384.0f*FLT_EPSILON ); }
-double clampd_short_rtn( double f ) { return dclamp( -32768.0, f, 32768. - 16384.0*DBL_EPSILON ); }
-float clampf_short_rtz( float f ) { return fclamp( -32769.0f + 32768.f*FLT_EPSILON, f, 32768.f - 16384.0f*FLT_EPSILON ); }
-double clampd_short_rtz( double f ) { return dclamp( -32769.0 + 32768.*DBL_EPSILON, f, 32768. - 16384.0*DBL_EPSILON ); }
-
-float clampf_uint( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
-double clampd_uint( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); }
-float clampf_uint_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
-double clampd_uint_rte( double f ) { return dclamp( -0.5, f, CL_UINT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * DBL_EPSILON ); }
-float clampf_uint_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) ); }
-double clampd_uint_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, CL_UINT_MAX ); }
-float clampf_uint_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); }
-double clampd_uint_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) ); }
-float clampf_uint_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)); }
-double clampd_uint_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)); }
-
-float clampf_int( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
-double clampd_int( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
-float clampf_int_rte( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
-double clampd_int_rte( double f ) { return dclamp( INT_MIN - 0.5, f, CL_INT_MAX + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
-float clampf_int_rtp( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
-double clampd_int_rtp( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX ); }
-float clampf_int_rtn( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
-double clampd_int_rtn( double f ) { return dclamp( INT_MIN, f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
-float clampf_int_rtz( float f ) { return fclamp( INT_MIN, f, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) ); }
-double clampd_int_rtz( double f ) { return dclamp( INT_MIN - 1.0 + DBL_EPSILON * MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31), f, CL_INT_MAX + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * DBL_EPSILON ); }
-
-float clampf_float( float f ){ return f; }
-double clampd_float( double f ){ return f; }
-float clampf_float_rte( float f ){ return f; }
-double clampd_float_rte( double f ){ return f; }
-float clampf_float_rtp( float f ){ return f; }
-double clampd_float_rtp( double f ){ return f; }
-float clampf_float_rtn( float f ){ return f; }
-double clampd_float_rtn( double f ){ return f; }
-float clampf_float_rtz( float f ){ return f; }
-double clampd_float_rtz( double f ){ return f; }
-
-float clampf_double( float f ){ return f; }
-double clampd_double( double f ){ return f; }
-float clampf_double_rte( float f ){ return f; }
-double clampd_double_rte( double f ){ return f; }
-float clampf_double_rtp( float f ){ return f; }
-double clampd_double_rtp( double f ){ return f; }
-float clampf_double_rtn( float f ){ return f; }
-double clampd_double_rtn( double f ){ return f; }
-float clampf_double_rtz( float f ){ return f; }
-double clampd_double_rtz( double f ){ return f; }
-
-float clampf_ulong( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
-double clampd_ulong( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
-float clampf_ulong_rte( float f ) { return fclamp( -0.5f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
-double clampd_ulong_rte( double f ) { return dclamp( -0.5, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
-float clampf_ulong_rtp( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
-double clampd_ulong_rtp( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
-float clampf_ulong_rtn( float f ) { return fclamp( -0.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
-double clampd_ulong_rtn( double f ) { return dclamp( -0.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
-float clampf_ulong_rtz( float f ) { return fclamp( -1.0f + FLT_EPSILON/2.0f, f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) ); }
-double clampd_ulong_rtz( double f ) { return dclamp( -1.0 + DBL_EPSILON/2.0, f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) ); }
-
-float clampf_long( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
-double clampd_long( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
-float clampf_long_rte( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
-double clampd_long_rte( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
-float clampf_long_rtp( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
-double clampd_long_rtp( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
-float clampf_long_rtn( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
-double clampd_long_rtn( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
-float clampf_long_rtz( float f ) { return fclamp( MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), f, MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38) ); }
-double clampd_long_rtz( double f ) { return dclamp( MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10) ); }
-
-#pragma mark -
-
-int alwaysPass( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int alwaysFail( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_uchar( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_char( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_ushort( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_short( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_uint( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_int( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_ulong( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_long( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_float( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-int check_double( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
-
-void init_uchar( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_char( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_ushort( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_short( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_uint( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_int( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_float( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_double( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_ulong( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-void init_long( void *dest, SaturationMode, RoundingMode, Type destType, uint64_t start, int count, MTdata d );
-
-InitDataFunc gInitFunctions[ kTypeCount ] = {
- init_uchar, init_char,
- init_ushort, init_short,
- init_uint, init_int,
- init_float, init_double,
- init_ulong, init_long
- };
-
-
-CheckResults gCheckResults[ kTypeCount ] = {
- check_uchar, check_char, check_ushort, check_short, check_uint,
- check_int, check_float, check_double, check_ulong, check_long
- };
-#if !defined (__APPLE__)
-#define UNUSED
-#else
-#define UNUSED __attribute__((unused))
-#endif
+void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
+ void *data)
+{
+ std::unique_ptr<CalcRefValsBase> &info =
+ *(std::unique_ptr<CalcRefValsBase> *)data;
+
+ cl_uint vectorSize = info->vectorSize;
+ cl_uint count = info->parent->count;
+ Type outType =
+ info->parent->outType; // the data type of the conversion result
+ Type inType = info->parent->inType; // the data type of the conversion input
+ size_t j;
+ cl_int error;
+ cl_event doneBarrier = info->parent->doneBarrier;
+
+ // report spurious error condition
+ if (CL_SUCCESS != status)
+ {
+ vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n",
+ status);
+ gFailCount++; // lazy about thread safety here
+ return;
+ }
-int alwaysPass( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize){ return 0; }
-int alwaysFail( void UNUSED *out1, void UNUSED *out2, void UNUSED *allowZ, uint32_t UNUSED count, int UNUSED vectorSize ){ return -1; }
+ // Now we know that both results have been mapped back from the device, and
+ // the main thread is done calculating the reference results. It is now time
+ // to check the results.
-int check_uchar( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_uchar *t = (const cl_uchar*)test;
- const cl_uchar *c = (const cl_uchar*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
+ // verify results
+ void *mapped = info->p;
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uchar)0))
+ // Patch up NaNs conversions to integer to zero -- these can be converted to
+ // any integer
+ if (outType != kfloat && outType != kdouble)
+ {
+ if (inType == kfloat)
{
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] );
- return i + 1;
+ float *inp = (float *)gIn;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j]))
+ memset((char *)mapped + j * gTypeSizes[outType], 0,
+ gTypeSizes[outType]);
+ }
}
-
- return 0;
-}
-
-int check_char( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_char *t = (const cl_char*)test;
- const cl_char *c = (const cl_char*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
-
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_char)0))
+ if (inType == kdouble)
{
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%2.2x vs 0x%2.2x\n", vectorSize, i, c[i], t[i] );
- return i + 1;
+ double *inp = (double *)gIn;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j]))
+ memset((char *)mapped + j * gTypeSizes[outType], 0,
+ gTypeSizes[outType]);
+ }
}
+ }
+ else if (inType == kfloat || inType == kdouble)
+ { // outtype and intype is float or double. NaN conversions for float <->
+ // double can be any NaN
+ if (inType == kfloat && outType == kdouble)
+ {
+ float *inp = (float *)gIn;
+ double *outp = (double *)mapped;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
+ }
+ }
+ if (inType == kdouble && outType == kfloat)
+ {
+ double *inp = (double *)gIn;
+ float *outp = (float *)mapped;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
+ }
+ }
+ }
- return 0;
-}
+ if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
+ info->result =
+ info->check_result(mapped, count, vectorSizes[vectorSize]);
+ else
+ info->result = 0;
-int check_ushort( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_ushort *t = (const cl_ushort*)test;
- const cl_ushort *c = (const cl_ushort*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
+ // Fill the output buffer with junk and release it
+ {
+ cl_uint pattern = 0xffffdead;
+ memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]);
+ if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize],
+ mapped, 0, NULL, NULL)))
+ {
+ vlog_error("ERROR: clEnqueueUnmapMemObject failed in "
+ "CalcReferenceValuesComplete (%d)\n",
+ error);
+ gFailCount++;
+ }
+ }
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ushort)0))
+ if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1))
+ {
+ if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE)))
{
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] );
- return i + 1;
+ vlog_error("ERROR: clSetUserEventStatus failed in "
+ "CalcReferenceValuesComplete (err: %d). We're probably "
+ "going to deadlock.\n",
+ status);
+ gFailCount++;
+ return;
}
- return 0;
+ if ((status = clReleaseEvent(doneBarrier)))
+ {
+ vlog_error("ERROR: clReleaseEvent failed in "
+ "CalcReferenceValuesComplete (err: %d).\n",
+ status);
+ gFailCount++;
+ return;
+ }
+ }
+ // e was already released by WriteInputBufferComplete. It should be
+ // destroyed automatically soon after all the calls to
+ // CalcReferenceValuesComplete exit.
}
-int check_short( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_short *t = (const cl_short*)test;
- const cl_short *c = (const cl_short*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
+//
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_short)0))
- {
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%4.4x vs 0x%4.4x\n", vectorSize, i, c[i], t[i] );
- return i + 1;
- }
+namespace conv_test {
- return 0;
-}
+////////////////////////////////////////////////////////////////////////////////
-int check_uint( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
{
- const cl_uint *t = (const cl_uint*)test;
- const cl_uint *c = (const cl_uint*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
+ DataInitBase *info = (DataInitBase *)p;
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_uint)0))
- {
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] );
- return i + 1;
- }
+ info->init(job_id, thread_id);
- return 0;
+ return CL_SUCCESS;
}
-int check_int( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
+////////////////////////////////////////////////////////////////////////////////
+
+cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
{
- const cl_int *t = (const cl_int*)test;
- const cl_int *c = (const cl_int*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
+ DataInitBase *info = (DataInitBase *)p;
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_int)0))
- {
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%8.8x vs 0x%8.8x\n", vectorSize, i, c[i], t[i] );
- return i + 1;
- }
+ cl_uint count = info->size;
+ Type inType = info->inType;
+ Type outType = info->outType;
+ RoundingMode round = info->round;
+ size_t j;
- return 0;
-}
+ Force64BitFPUPrecision();
-int check_ulong( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_ulong *t = (const cl_ulong*)test;
- const cl_ulong *c = (const cl_ulong*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
+ void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType];
+ void *a = (cl_uchar *)gAllowZ + job_id * count;
+ void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_ulong)0))
+
+ if (outType != inType)
+ {
+ // create the reference while we wait
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+ /* ARM VFP doesn't have hardware instruction for converting from 64-bit
+ * integer to float types, hence GCC ARM uses the floating-point
+ * emulation code despite which -mfloat-abi setting it is. But the
+ * emulation code in libgcc.a has only one rounding mode (round to
+ * nearest even in this case) and ignores the user rounding mode setting
+ * in hardware. As a result setting rounding modes in hardware won't
+ * give correct rounding results for type covert from 64-bit integer to
+ * float using GCC for ARM compiler so for testing different rounding
+ * modes, we need to use alternative reference function. ARM64 does have
+ * an instruction, however we cannot guarantee the compiler will use it.
+ * On all ARM architechures use emulation to calculate reference.*/
+ switch (round)
{
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] );
- return i + 1;
+ /* conversions to floating-point type use the current rounding mode.
+ * The only default floating-point rounding mode supported is round
+ * to nearest even i.e the current rounding mode will be _rte for
+ * floating-point types. */
+ case kDefaultRoundingMode: qcom_rm = qcomRTE; break;
+ case kRoundToNearestEven: qcom_rm = qcomRTE; break;
+ case kRoundUp: qcom_rm = qcomRTP; break;
+ case kRoundDown: qcom_rm = qcomRTN; break;
+ case kRoundTowardZero: qcom_rm = qcomRTZ; break;
+ default:
+ vlog_error("ERROR: undefined rounding mode %d\n", round);
+ break;
}
+ qcom_sat = info->sat;
+#endif
- return 0;
-}
+ RoundingMode oldRound = set_round(round, outType);
-int check_long( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_long *t = (const cl_long*)test;
- const cl_long *c = (const cl_long*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
+ if (info->sat)
+ info->conv_array_sat(d, s, count);
+ else
+ info->conv_array(d, s, count);
- for( i = 0; i < count; i++ )
- if( t[i] != c[i] && !(a[i] != (cl_uchar)0 && t[i] == (cl_long)0))
+ set_round(oldRound, outType);
+
+ // Decide if we allow a zero result in addition to the correctly rounded
+ // one
+ memset(a, 0, count);
+ if (gForceFTZ)
{
- vlog( "\nError for vector size %d found at 0x%8.8x: *0x%16.16llx vs 0x%16.16llx\n", vectorSize, i, c[i], t[i] );
- return i + 1;
+ if (inType == kfloat || outType == kfloat)
+ setAllowZ((uint8_t *)a, (uint32_t *)s, count);
}
+ }
+ else
+ {
+ // Copy the input to the reference
+ memcpy(d, s, info->size * gTypeSizes[inType]);
+ }
- return 0;
-}
-
-int check_float( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_uint *t = (const cl_uint*)test;
- const cl_uint *c = (const cl_uint*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
-
- for( i = 0; i < count; i++ )
- if (t[i] != c[i] &&
- // Allow nan's to be binary different
- !((t[i] & 0x7fffffffU) > 0x7f800000U &&
- (c[i] & 0x7fffffffU) > 0x7f800000U) &&
- !(a[i] != (cl_uchar)0 &&
- t[i] == (c[i] & 0x80000000U))) {
- vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
- vectorSize, i, ((float*)correct)[i], ((float*)test)[i] );
- return i + 1;
+ // Patch up NaNs conversions to integer to zero -- these can be converted to
+ // any integer
+ if (info->outType != kfloat && info->outType != kdouble)
+ {
+ if (inType == kfloat)
+ {
+ float *inp = (float *)s;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j]))
+ memset((char *)d + j * gTypeSizes[outType], 0,
+ gTypeSizes[outType]);
+ }
+ }
+ if (inType == kdouble)
+ {
+ double *inp = (double *)s;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j]))
+ memset((char *)d + j * gTypeSizes[outType], 0,
+ gTypeSizes[outType]);
+ }
+ }
+ }
+ else if (inType == kfloat || inType == kdouble)
+ { // outtype and intype is float or double. NaN conversions for float <->
+ // double can be any NaN
+ if (inType == kfloat && outType == kdouble)
+ {
+ float *inp = (float *)s;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j])) ((double *)d)[j] = NAN;
+ }
}
+ if (inType == kdouble && outType == kfloat)
+ {
+ double *inp = (double *)s;
+ for (j = 0; j < count; j++)
+ {
+ if (isnan(inp[j])) ((float *)d)[j] = NAN;
+ }
+ }
+ }
- return 0;
+ return CL_SUCCESS;
}
-int check_double( void *test, void *correct, void *allowZ, uint32_t count, int vectorSize )
-{
- const cl_ulong *t = (const cl_ulong*)test;
- const cl_ulong *c = (const cl_ulong*)correct;
- const cl_uchar *a = (const cl_uchar*)allowZ;
- uint32_t i;
-
- for( i = 0; i < count; i++ )
- if (t[i] != c[i] &&
- // Allow nan's to be binary different
- !((t[i] & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL &&
- (c[i] & 0x7fffffffffffffffULL) > 0x7f80000000000000ULL) &&
- !(a[i] != (cl_uchar)0 &&
- t[i] == (c[i] & 0x8000000000000000ULL))) {
- vlog( "\nError for vector size %d found at 0x%8.8x: *%a vs %a\n",
- vectorSize, i, ((double*)correct)[i], ((double*)test)[i] );
- return i + 1;
- }
+////////////////////////////////////////////////////////////////////////////////
+uint64_t GetTime(void)
+{
+#if defined(__APPLE__)
+ return mach_absolute_time();
+#elif defined(_MSC_VER)
+ return ReadTime();
+#else
+ // mach_absolute_time is a high precision timer with precision < 1
+ // microsecond.
+#warning need accurate clock here. Times are invalid.
return 0;
+#endif
}
+////////////////////////////////////////////////////////////////////////////////
-void init_uchar( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d )
+// Note: not called reentrantly
+void WriteInputBufferComplete(void *data)
{
- cl_uchar *o = (cl_uchar *)out;
- int i;
+ cl_int status;
+ WriteInputBufferInfo *info = (WriteInputBufferInfo *)data;
+ cl_uint count = info->count;
+ int vectorSize;
- for( i = 0; i < count; i++ )
- o[i] = start++;
-}
+ info->barrierCount = gMaxVectorSize - gMinVectorSize;
-void init_char( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d )
-{
- char *o = (char *)out;
- int i;
+ // now that we know that the write buffer is complete, enqueue callbacks to
+ // wait for the main thread to finish calculating the reference results.
+ for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+ {
+ size_t workItemCount =
+ (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]);
- for( i = 0; i < count; i++ )
- o[i] = start++;
-}
+ if ((status = conv_test::RunKernel(info->calcInfo[vectorSize]->kernel,
+ gInBuffer, gOutBuffers[vectorSize],
+ workItemCount)))
+ {
+ gFailCount++;
+ return;
+ }
-void init_ushort( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata UNUSED d )
-{
- cl_ushort *o = (cl_ushort *)out;
- int i;
+ info->calcInfo[vectorSize]->p = clEnqueueMapBuffer(
+ gQueue, gOutBuffers[vectorSize], CL_TRUE,
+ CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0,
+ NULL, NULL, &status);
+ {
+ if (status)
+ {
+ vlog_error("ERROR: WriteInputBufferComplete calback failed "
+ "with status: %d\n",
+ status);
+ gFailCount++;
+ return;
+ }
+ }
+ }
- for( i = 0; i < count; i++ )
- o[i] = start++;
-}
+ for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
+ {
+ MapResultValuesComplete(info->calcInfo[vectorSize]);
+ }
-void init_short( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, UNUSED Type destType, uint64_t start, int count, MTdata UNUSED d )
-{
- short *o = (short *)out;
- int i;
+ // Make sure the work starts moving -- otherwise we may deadlock
+ if ((status = clFlush(gQueue)))
+ {
+ vlog_error(
+ "ERROR: WriteInputBufferComplete calback failed with status: %d\n",
+ status);
+ gFailCount++;
+ return;
+ }
- for( i = 0; i < count; i++ )
- o[i] = start++;
+ // e was already released by the main thread. It should be destroyed
+ // automatically soon after we exit.
}
-void init_uint( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d )
+////////////////////////////////////////////////////////////////////////////////
+
+cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
+ RoundingMode round, int vectorSize, cl_kernel *outKernel)
{
- static const unsigned int specialValuesUInt[] = {
- INT_MIN, INT_MIN + 1, INT_MIN + 2,
- -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3,
- -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3,
- -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3,
- -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3,
- -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3,
- -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3,
- -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3,
- -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3,
- -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3,
- -4, -3, -2, -1, 0, 1, 2, 3, 4,
- (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3,
- (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3,
- (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3,
- (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3,
- (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3,
- (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3,
- (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3,
- (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3,
- (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3,
- INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
- UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX
- };
-
- cl_uint *o = (cl_uint *)out;
- int i;
+ cl_program program;
+ char testName[256];
+ int error = 0;
- for( i = 0; i < count; i++) {
- if( gIsEmbedded )
- o[i] = (cl_uint) genrand_int32(d);
- else
- o[i] = (cl_uint)i + start;
- }
+ std::ostringstream source;
+ if (outType == kdouble || inType == kdouble)
+ source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
- if( 0 == start )
+ // Create the program. This is a bit complicated because we are trying to
+ // avoid byte and short stores.
+ if (0 == vectorSize)
{
- size_t tableSize = sizeof( specialValuesUInt );
- if( sizeof( cl_uint) * count < tableSize )
- tableSize = sizeof( cl_uint) * count;
- memcpy( (char*)(o + i) - tableSize, specialValuesUInt, tableSize );
+ // Create the type names.
+ char inName[32];
+ char outName[32];
+ strncpy(inName, gTypeNames[inType], sizeof(inName));
+ strncpy(outName, gTypeNames[outType], sizeof(outName));
+ sprintf(testName, "test_implicit_%s_%s", outName, inName);
+
+ source << "__kernel void " << testName << "( __global " << inName
+ << " *src, __global " << outName << " *dest )\n";
+ source << "{\n";
+ source << " size_t i = get_global_id(0);\n";
+ source << " dest[i] = src[i];\n";
+ source << "}\n";
+
+ vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType],
+ gTypeNames[outType]);
+ fflush(stdout);
}
-}
+ else
+ {
+ int vectorSizetmp = vectorSizes[vectorSize];
-void init_int( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d )
-{
- static const unsigned int specialValuesInt[] = {
- INT_MIN, INT_MIN + 1, INT_MIN + 2,
- -(1<<30)-3,-(1<<30)-2,-(1<<30)-1, -(1<<30), -(1<<30)+1, -(1<<30)+2, -(1<<30)+3,
- -(1<<24)-3,-(1<<24)-2,-(1<<24)-1, -(1<<24), -(1<<24)+1, -(1<<24)+2, -(1<<24)+3,
- -(1<<23)-3,-(1<<23)-2,-(1<<23)-1, -(1<<23), -(1<<23)+1, -(1<<23)+2, -(1<<23)+3,
- -(1<<22)-3,-(1<<22)-2,-(1<<22)-1, -(1<<22), -(1<<22)+1, -(1<<22)+2, -(1<<22)+3,
- -(1<<21)-3,-(1<<21)-2,-(1<<21)-1, -(1<<21), -(1<<21)+1, -(1<<21)+2, -(1<<21)+3,
- -(1<<16)-3,-(1<<16)-2,-(1<<16)-1, -(1<<16), -(1<<16)+1, -(1<<16)+2, -(1<<16)+3,
- -(1<<15)-3,-(1<<15)-2,-(1<<15)-1, -(1<<15), -(1<<15)+1, -(1<<15)+2, -(1<<15)+3,
- -(1<<8)-3,-(1<<8)-2,-(1<<8)-1, -(1<<8), -(1<<8)+1, -(1<<8)+2, -(1<<8)+3,
- -(1<<7)-3,-(1<<7)-2,-(1<<7)-1, -(1<<7), -(1<<7)+1, -(1<<7)+2, -(1<<7)+3,
- -4, -3, -2, -1, 0, 1, 2, 3, 4,
- (1<<7)-3,(1<<7)-2,(1<<7)-1, (1<<7), (1<<7)+1, (1<<7)+2, (1<<7)+3,
- (1<<8)-3,(1<<8)-2,(1<<8)-1, (1<<8), (1<<8)+1, (1<<8)+2, (1<<8)+3,
- (1<<15)-3,(1<<15)-2,(1<<15)-1, (1<<15), (1<<15)+1, (1<<15)+2, (1<<15)+3,
- (1<<16)-3,(1<<16)-2,(1<<16)-1, (1<<16), (1<<16)+1, (1<<16)+2, (1<<16)+3,
- (1<<21)-3,(1<<21)-2,(1<<21)-1, (1<<21), (1<<21)+1, (1<<21)+2, (1<<21)+3,
- (1<<22)-3,(1<<22)-2,(1<<22)-1, (1<<22), (1<<22)+1, (1<<22)+2, (1<<22)+3,
- (1<<23)-3,(1<<23)-2,(1<<23)-1, (1<<23), (1<<23)+1, (1<<23)+2, (1<<23)+3,
- (1<<24)-3,(1<<24)-2,(1<<24)-1, (1<<24), (1<<24)+1, (1<<24)+2, (1<<24)+3,
- (1<<30)-3,(1<<30)-2,(1<<30)-1, (1<<30), (1<<30)+1, (1<<30)+2, (1<<30)+3,
- INT_MAX-3, INT_MAX-2, INT_MAX-1, INT_MAX, // 0x80000000, 0x80000001 0x80000002 already covered above
- UINT_MAX-3, UINT_MAX-2, UINT_MAX-1, UINT_MAX
- };
-
- int *o = (int *)out;
- int i;
+ // Create the type names.
+ char convertString[128];
+ char inName[32];
+ char outName[32];
+ switch (vectorSizetmp)
+ {
+ case 1:
+ strncpy(inName, gTypeNames[inType], sizeof(inName));
+ strncpy(outName, gTypeNames[outType], sizeof(outName));
+ snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
+ outName, gSaturationNames[sat],
+ gRoundingModeNames[round]);
+ snprintf(testName, 256, "test_%s_%s", convertString, inName);
+ vlog("Building %s( %s ) test\n", convertString, inName);
+ break;
+ case 3:
+ strncpy(inName, gTypeNames[inType], sizeof(inName));
+ strncpy(outName, gTypeNames[outType], sizeof(outName));
+ snprintf(convertString, sizeof(convertString),
+ "convert_%s3%s%s", outName, gSaturationNames[sat],
+ gRoundingModeNames[round]);
+ snprintf(testName, 256, "test_%s_%s3", convertString, inName);
+ vlog("Building %s( %s3 ) test\n", convertString, inName);
+ break;
+ default:
+ snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType],
+ vectorSizetmp);
+ snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType],
+ vectorSizetmp);
+ snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
+ outName, gSaturationNames[sat],
+ gRoundingModeNames[round]);
+ snprintf(testName, 256, "test_%s_%s", convertString, inName);
+ vlog("Building %s( %s ) test\n", convertString, inName);
+ break;
+ }
+ fflush(stdout);
- for( i = 0; i < count; i++ ) {
- if( gIsEmbedded ) {
- o[i] = (int) genrand_int32(d);
- }
- else {
- o[i] = (int) i + start;
- }
+ if (vectorSizetmp == 3)
+ {
+ source << "__kernel void " << testName << "( __global " << inName
+ << " *src, __global " << outName << " *dest )\n";
+ source << "{\n";
+ source << " size_t i = get_global_id(0);\n";
+ source << " if( i + 1 < get_global_size(0))\n";
+ source << " vstore3( " << convertString
+ << "( vload3( i, src)), i, dest );\n";
+ source << " else\n";
+ source << " {\n";
+ source << " " << inName << "3 in;\n";
+ source << " " << outName << "3 out;\n";
+ source << " if( 0 == (i & 1) )\n";
+ source << " in.y = src[3*i+1];\n";
+ source << " in.x = src[3*i];\n";
+ source << " out = " << convertString << "( in ); \n";
+ source << " dest[3*i] = out.x;\n";
+ source << " if( 0 == (i & 1) )\n";
+ source << " dest[3*i+1] = out.y;\n";
+ source << " }\n";
+ source << "}\n";
+ }
+ else
+ {
+ source << "__kernel void " << testName << "( __global " << inName
+ << " *src, __global " << outName << " *dest )\n";
+ source << "{\n";
+ source << " size_t i = get_global_id(0);\n";
+ source << " dest[i] = " << convertString << "( src[i] );\n";
+ source << "}\n";
+ }
}
+ *outKernel = NULL;
- if( 0 == start )
+ const char *flags = NULL;
+ if (gForceFTZ) flags = "-cl-denorms-are-zero";
+
+ // build it
+ std::string sourceString = source.str();
+ const char *programSource = sourceString.c_str();
+ error = create_single_kernel_helper(gContext, &program, outKernel, 1,
+ &programSource, testName, flags);
+ if (error)
{
- size_t tableSize = sizeof( specialValuesInt );
- if( sizeof( int) * count < tableSize )
- tableSize = sizeof( int) * count;
- memcpy( (char*)(o + i) - tableSize, specialValuesInt, tableSize );
+ vlog_error("Failed to build kernel/program (err = %d).\n", error);
+ return NULL;
}
+
+ return program;
}
-void init_float( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d )
+//
+
+int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount)
{
- static const float specialValuesFloat[] = {
- -NAN, -INFINITY, -FLT_MAX, MAKE_HEX_FLOAT(-0x1.000002p64f, -0x1000002L, 40), MAKE_HEX_FLOAT(-0x1.0p64f, -0x1L, 64), MAKE_HEX_FLOAT(-0x1.fffffep63f, -0x1fffffeL, 39), MAKE_HEX_FLOAT(-0x1.000002p63f, -0x1000002L, 39), MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(-0x1.fffffep62f, -0x1fffffeL, 38),
- MAKE_HEX_FLOAT(-0x1.000002p32f, -0x1000002L, 8), MAKE_HEX_FLOAT(-0x1.0p32f, -0x1L, 32), MAKE_HEX_FLOAT(-0x1.fffffep31f, -0x1fffffeL, 7), MAKE_HEX_FLOAT(-0x1.000002p31f, -0x1000002L, 7), MAKE_HEX_FLOAT(-0x1.0p31f, -0x1L, 31), MAKE_HEX_FLOAT(-0x1.fffffep30f, -0x1fffffeL, 6), -1000.f, -100.f, -4.0f, -3.5f,
- -3.0f, MAKE_HEX_FLOAT(-0x1.800002p1f, -0x1800002L, -23), -2.5f, MAKE_HEX_FLOAT(-0x1.7ffffep1f, -0x17ffffeL, -23), -2.0f, MAKE_HEX_FLOAT(-0x1.800002p0f, -0x1800002L, -24), -1.5f, MAKE_HEX_FLOAT(-0x1.7ffffep0f, -0x17ffffeL, -24),MAKE_HEX_FLOAT(-0x1.000002p0f, -0x1000002L, -24), -1.0f, MAKE_HEX_FLOAT(-0x1.fffffep-1f, -0x1fffffeL, -25),
- MAKE_HEX_FLOAT(-0x1.000002p-1f, -0x1000002L, -25), -0.5f, MAKE_HEX_FLOAT(-0x1.fffffep-2f, -0x1fffffeL, -26), MAKE_HEX_FLOAT(-0x1.000002p-2f, -0x1000002L, -26), -0.25f, MAKE_HEX_FLOAT(-0x1.fffffep-3f, -0x1fffffeL, -27),
- MAKE_HEX_FLOAT(-0x1.000002p-126f, -0x1000002L, -150), -FLT_MIN, MAKE_HEX_FLOAT(-0x0.fffffep-126f, -0x0fffffeL, -150), MAKE_HEX_FLOAT(-0x0.000ffep-126f, -0x0000ffeL, -150), MAKE_HEX_FLOAT(-0x0.0000fep-126f, -0x00000feL, -150), MAKE_HEX_FLOAT(-0x0.00000ep-126f, -0x000000eL, -150), MAKE_HEX_FLOAT(-0x0.00000cp-126f, -0x000000cL, -150), MAKE_HEX_FLOAT(-0x0.00000ap-126f, -0x000000aL, -150),
- MAKE_HEX_FLOAT(-0x0.000008p-126f, -0x0000008L, -150), MAKE_HEX_FLOAT(-0x0.000006p-126f, -0x0000006L, -150), MAKE_HEX_FLOAT(-0x0.000004p-126f, -0x0000004L, -150), MAKE_HEX_FLOAT(-0x0.000002p-126f, -0x0000002L, -150), -0.0f,
- +NAN, +INFINITY, +FLT_MAX, MAKE_HEX_FLOAT(+0x1.000002p64f, +0x1000002L, 40), MAKE_HEX_FLOAT(+0x1.0p64f, +0x1L, 64), MAKE_HEX_FLOAT(+0x1.fffffep63f, +0x1fffffeL, 39), MAKE_HEX_FLOAT(+0x1.000002p63f, +0x1000002L, 39), MAKE_HEX_FLOAT(+0x1.0p63f, +0x1L, 63), MAKE_HEX_FLOAT(+0x1.fffffep62f, +0x1fffffeL, 38),
- MAKE_HEX_FLOAT(+0x1.000002p32f, +0x1000002L, 8), MAKE_HEX_FLOAT(+0x1.0p32f, +0x1L, 32), MAKE_HEX_FLOAT(+0x1.fffffep31f, +0x1fffffeL, 7), MAKE_HEX_FLOAT(+0x1.000002p31f, +0x1000002L, 7), MAKE_HEX_FLOAT(+0x1.0p31f, +0x1L, 31), MAKE_HEX_FLOAT(+0x1.fffffep30f, +0x1fffffeL, 6), +1000.f, +100.f, +4.0f, +3.5f,
- +3.0f, MAKE_HEX_FLOAT(+0x1.800002p1f, +0x1800002L, -23), 2.5f, MAKE_HEX_FLOAT(+0x1.7ffffep1f, +0x17ffffeL, -23),+2.0f, MAKE_HEX_FLOAT(+0x1.800002p0f, +0x1800002L, -24), 1.5f, MAKE_HEX_FLOAT(+0x1.7ffffep0f, +0x17ffffeL, -24), MAKE_HEX_FLOAT(+0x1.000002p0f, +0x1000002L, -24), +1.0f, MAKE_HEX_FLOAT(+0x1.fffffep-1f, +0x1fffffeL, -25),
- MAKE_HEX_FLOAT(+0x1.000002p-1f, +0x1000002L, -25), +0.5f, MAKE_HEX_FLOAT(+0x1.fffffep-2f, +0x1fffffeL, -26), MAKE_HEX_FLOAT(+0x1.000002p-2f, +0x1000002L, -26), +0.25f, MAKE_HEX_FLOAT(+0x1.fffffep-3f, +0x1fffffeL, -27),
- MAKE_HEX_FLOAT(0x1.000002p-126f, 0x1000002L, -150), +FLT_MIN, MAKE_HEX_FLOAT(+0x0.fffffep-126f, +0x0fffffeL, -150), MAKE_HEX_FLOAT(+0x0.000ffep-126f, +0x0000ffeL, -150), MAKE_HEX_FLOAT(+0x0.0000fep-126f, +0x00000feL, -150), MAKE_HEX_FLOAT(+0x0.00000ep-126f, +0x000000eL, -150), MAKE_HEX_FLOAT(+0x0.00000cp-126f, +0x000000cL, -150), MAKE_HEX_FLOAT(+0x0.00000ap-126f, +0x000000aL, -150),
- MAKE_HEX_FLOAT(+0x0.000008p-126f, +0x0000008L, -150), MAKE_HEX_FLOAT(+0x0.000006p-126f, +0x0000006L, -150), MAKE_HEX_FLOAT(+0x0.000004p-126f, +0x0000004L, -150), MAKE_HEX_FLOAT(+0x0.000002p-126f, +0x0000002L, -150), +0.0f
- };
-
- cl_uint *o = (cl_uint *)out;
- int i;
+ // The global dimensions are just the blockCount to execute since we haven't
+ // set up multiple queues for multiple devices.
+ int error;
- for( i = 0; i < count; i++ ) {
- if( gIsEmbedded )
- o[i] = (cl_uint) genrand_int32(d);
- else
- o[i] = (cl_uint) i + start;
- }
+ error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf);
+ error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf);
- if( 0 == start )
+ if (error)
{
- size_t tableSize = sizeof( specialValuesFloat );
- if( sizeof( float) * count < tableSize )
- tableSize = sizeof( float) * count;
- memcpy( (char*)(o + i) - tableSize, specialValuesFloat, tableSize );
+ vlog_error("FAILED -- could not set kernel args (%d)\n", error);
+ return error;
}
- if( kUnsaturated == sat )
+ if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount,
+ NULL, 0, NULL, NULL)))
{
- clampf func = gClampFloat[ destType ][round];
- float *f = (float *)out;
-
- for( i = 0; i < count; i++ )
- f[i] = func( f[i] );
+ vlog_error("FAILED -- could not execute kernel (%d)\n", error);
+ return error;
}
-}
-
-// used to convert a bucket of bits into a search pattern through double
-static inline double DoubleFromUInt32( uint32_t bits );
-static inline double DoubleFromUInt32( uint32_t bits )
-{
- union{ uint64_t u; double d;} u;
- // split 0x89abcdef to 0x89abc00000000def
- u.u = bits & 0xfffU;
- u.u |= (uint64_t) (bits & ~0xfffU) << 32;
-
- // sign extend the leading bit of def segment as sign bit so that the middle region consists of either all 1s or 0s
- u.u -= (bits & 0x800U) << 1;
-
- // return result
- return u.d;
+ return 0;
}
-// A table of more difficult cases to get right
-static const double specialValuesDouble[] = {
- -NAN, -INFINITY, -DBL_MAX, MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12), MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(-0x1.80000000000001p64, -0x180000000000001LL, 8),
- MAKE_HEX_DOUBLE(-0x1.8p64, -0x18LL, 60), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp64, -0x17ffffffffffffLL, 12), MAKE_HEX_DOUBLE(-0x1.80000000000001p63, -0x180000000000001LL, 7), MAKE_HEX_DOUBLE(-0x1.8p63, -0x18LL, 59), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp63, -0x17ffffffffffffLL, 11),
- MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11), MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(-0x1.80000000000001p32, -0x180000000000001LL, -24), MAKE_HEX_DOUBLE(-0x1.8p32, -0x18LL, 28), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp32, -0x17ffffffffffffLL, -20),
- MAKE_HEX_DOUBLE(-0x1.000002p32, -0x1000002LL, 8), MAKE_HEX_DOUBLE(-0x1.0p32, -0x1LL, 32), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp31, -0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.80000000000001p31, -0x180000000000001LL, -25), MAKE_HEX_DOUBLE(-0x1.8p31, -0x18LL, 27), MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp31, -0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(-0x1.0000000000001p31, -0x10000000000001LL, -21), MAKE_HEX_DOUBLE(-0x1.0p31, -0x1LL, 31), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp30, -0x1fffffffffffffLL, -22), -1000., -100., -4.0, -3.5,
- -3.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51), -2.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51), -2.0, MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52), -1.5, MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), -1.0, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
- MAKE_HEX_DOUBLE(-0x1.0000000000001p-1, -0x10000000000001LL, -53), -0.5, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-2, -0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(-0x1.0000000000001p-2, -0x10000000000001LL, -54), -0.25, MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-3, -0x1fffffffffffffLL, -55),
- MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074), -DBL_MIN, MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
- MAKE_HEX_DOUBLE(-0x0.0000000000008p-1022, -0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000007p-1022, -0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000006p-1022, -0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000005p-1022, -0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000004p-1022, -0x00000000000004LL, -1074),
- MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074), -0.0,
-
- MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11), MAKE_HEX_DOUBLE(0x1.80000000000001p63, 0x180000000000001LL, 7), MAKE_HEX_DOUBLE(0x1.8p63, 0x18LL, 59), MAKE_HEX_DOUBLE(0x1.7ffffffffffffp63, 0x17ffffffffffffLL, 11), MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11), MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
- MAKE_HEX_DOUBLE(+0x1.80000000000001p32, +0x180000000000001LL, -24), MAKE_HEX_DOUBLE(+0x1.8p32, +0x18LL, 28), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp32, +0x17ffffffffffffLL, -20),
- MAKE_HEX_DOUBLE(+0x1.000002p32, +0x1000002LL, 8), MAKE_HEX_DOUBLE(+0x1.0p32, +0x1LL, 32), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp31, +0x1fffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.80000000000001p31, +0x180000000000001LL, -25), MAKE_HEX_DOUBLE(+0x1.8p31, +0x18LL, 27), MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp31, +0x17ffffffffffffLL, -21), MAKE_HEX_DOUBLE(+0x1.0000000000001p31, +0x10000000000001LL, -21), MAKE_HEX_DOUBLE(+0x1.0p31, +0x1LL, 31), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp30, +0x1fffffffffffffLL, -22), +1000., +100., +4.0, +3.5,
- +3.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51), +2.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51), +2.0, MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52), +1.5, MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52), +1.0, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
- MAKE_HEX_DOUBLE(+0x1.0000000000001p-1, +0x10000000000001LL, -53), +0.5, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-2, +0x1fffffffffffffLL, -54), MAKE_HEX_DOUBLE(+0x1.0000000000001p-2, +0x10000000000001LL, -54), +0.25, MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-3, +0x1fffffffffffffLL, -55),
- MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074), +DBL_MIN, MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074), MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074), MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
- MAKE_HEX_DOUBLE(+0x0.0000000000008p-1022, +0x00000000000008LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000007p-1022, +0x00000000000007LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000006p-1022, +0x00000000000006LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000005p-1022, +0x00000000000005LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000004p-1022, +0x00000000000004LL, -1074),
- MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074), MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074), +0.0,
-
- MAKE_HEX_DOUBLE(-0x1.ffffffffffffep62, -0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp62, -0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep62, +0x1ffffffffffffeLL, 10), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp62, +0x1ffffffffffffcLL, 10), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
- MAKE_HEX_DOUBLE(-0x1.ffffffffffffep51, -0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp51, -0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp51, -0x1fffffffffffffLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep51, +0x1ffffffffffffeLL, -1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp51, +0x1ffffffffffffcLL, -1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp51, +0x1fffffffffffffLL, -1),
- MAKE_HEX_DOUBLE(-0x1.ffffffffffffep52, -0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp52, -0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp52, -0x1fffffffffffffLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep52, +0x1ffffffffffffeLL, 0), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp52, +0x1ffffffffffffcLL, 0), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp52, +0x1fffffffffffffLL, 0),
- MAKE_HEX_DOUBLE(-0x1.ffffffffffffep53, -0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(-0x1.ffffffffffffcp53, -0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(-0x1.fffffffffffffp53, -0x1fffffffffffffLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffep53, +0x1ffffffffffffeLL, 1), MAKE_HEX_DOUBLE(+0x1.ffffffffffffcp53, +0x1ffffffffffffcLL, 1), MAKE_HEX_DOUBLE(+0x1.fffffffffffffp53, +0x1fffffffffffffLL, 1),
- MAKE_HEX_DOUBLE(-0x1.0000000000002p52, -0x10000000000002LL, 0), MAKE_HEX_DOUBLE(-0x1.0000000000001p52, -0x10000000000001LL, 0), MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52), MAKE_HEX_DOUBLE(+0x1.0000000000002p52, +0x10000000000002LL, 0), MAKE_HEX_DOUBLE(+0x1.0000000000001p52, +0x10000000000001LL, 0), MAKE_HEX_DOUBLE(+0x1.0p52, +0x1LL, 52),
- MAKE_HEX_DOUBLE(-0x1.0000000000002p53, -0x10000000000002LL, 1), MAKE_HEX_DOUBLE(-0x1.0000000000001p53, -0x10000000000001LL, 1), MAKE_HEX_DOUBLE(-0x1.0p53, -0x1LL, 53), MAKE_HEX_DOUBLE(+0x1.0000000000002p53, +0x10000000000002LL, 1), MAKE_HEX_DOUBLE(+0x1.0000000000001p53, +0x10000000000001LL, 1), MAKE_HEX_DOUBLE(+0x1.0p53, +0x1LL, 53),
- MAKE_HEX_DOUBLE(-0x1.0000000000002p54, -0x10000000000002LL, 2), MAKE_HEX_DOUBLE(-0x1.0000000000001p54, -0x10000000000001LL, 2), MAKE_HEX_DOUBLE(-0x1.0p54, -0x1LL, 54), MAKE_HEX_DOUBLE(+0x1.0000000000002p54, +0x10000000000002LL, 2), MAKE_HEX_DOUBLE(+0x1.0000000000001p54, +0x10000000000001LL, 2), MAKE_HEX_DOUBLE(+0x1.0p54, +0x1LL, 54),
- MAKE_HEX_DOUBLE(-0x1.fffffffefffffp62, -0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(-0x1.ffffffffp62, -0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(-0x1.ffffffff00001p62, -0x1ffffffff00001LL, 10), MAKE_HEX_DOUBLE(0x1.fffffffefffffp62, 0x1fffffffefffffLL, 10), MAKE_HEX_DOUBLE(0x1.ffffffffp62, 0x1ffffffffLL, 30), MAKE_HEX_DOUBLE(0x1.ffffffff00001p62, 0x1ffffffff00001LL, 10),
-};
-
-void init_double( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata UNUSED d )
+int GetTestCase(const char *name, Type *outType, Type *inType,
+ SaturationMode *sat, RoundingMode *round)
{
- double *o = (double*)out;
int i;
- for( i = 0; i < count; i++ )
- {
- uint64_t z = i + start;
- o[i] = DoubleFromUInt32( (uint32_t) z ^ (uint32_t) (z >> 32));
- }
+ // Find the return type
+ for (i = 0; i < kTypeCount; i++)
+ if (name == strstr(name, gTypeNames[i]))
+ {
+ *outType = (Type)i;
+ name += strlen(gTypeNames[i]);
- if( 0 == start )
- {
- size_t tableSize = sizeof( specialValuesDouble );
- if( sizeof( cl_double) * count < tableSize )
- tableSize = sizeof( cl_double) * count;
- memcpy( (char*)(o + i) - tableSize, specialValuesDouble, tableSize );
- }
+ break;
+ }
- if( 0 == sat )
- {
- clampd func = gClampDouble[ destType ][round];
+ if (i == kTypeCount) return -1;
- for( i = 0; i < count; i++ )
- o[i] = func( o[i] );
- }
-}
+ // Check to see if _sat appears next
+ *sat = (SaturationMode)0;
+ for (i = 1; i < kSaturationModeCount; i++)
+ if (name == strstr(name, gSaturationNames[i]))
+ {
+ *sat = (SaturationMode)i;
+ name += strlen(gSaturationNames[i]);
+ break;
+ }
-cl_ulong random64( MTdata d )
-{
- return (cl_ulong) genrand_int32(d) | ((cl_ulong) genrand_int32(d) << 32);
-}
+ *round = (RoundingMode)0;
+ for (i = 1; i < kRoundingModeCount; i++)
+ if (name == strstr(name, gRoundingModeNames[i]))
+ {
+ *round = (RoundingMode)i;
+ name += strlen(gRoundingModeNames[i]);
+ break;
+ }
-void init_ulong( void *out, SaturationMode UNUSED sat, RoundingMode UNUSED round, Type UNUSED destType, uint64_t start, int count, MTdata d )
-{
- cl_ulong *o = (cl_ulong *)out;
- cl_ulong i, j, k;
+ if (*name != '_') return -2;
+ name++;
- i = 0;
- if( start == 0 )
- {
- //Try various powers of two
- for( j = 0; j < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ )
- o[j] = (cl_ulong) 1 << j;
- i = j;
-
- // try the complement of those
- for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ )
- o[i++] = ~((cl_ulong) 1 << j);
-
- //Try various negative powers of two
- for( j = 0; i < (cl_ulong) count && j < 8 * sizeof(cl_ulong); j++ )
- o[i++] = (cl_ulong) 0xFFFFFFFFFFFFFFFEULL << j;
-
- //try various powers of two plus 1, shifted by various amounts
- for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ )
- for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ )
- o[i++] = (((cl_ulong) 1 << j) + 1) << k;
-
- //try various powers of two minus 1
- for( j = 0; i < (cl_ulong)count && j < 8 * sizeof(cl_ulong); j++ )
- for( k = 0; i < (cl_ulong)count && k < 8 * sizeof(cl_ulong) - j; k++ )
- o[i++] = (((cl_ulong) 1 << j) - 1) << k;
-
- // Other patterns
- cl_ulong pattern[] = { 0x3333333333333333ULL, 0x5555555555555555ULL, 0x9999999999999999ULL, 0x6666666666666666ULL, 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL };
- cl_ulong mask[] = { 0xffffffffffffffffULL, 0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL, 0xffffffff00000000ULL };
- for( j = 0; i < (cl_ulong) count && j < sizeof(pattern) / sizeof( pattern[0]); j++ )
- for( k = 0; i + 2 <= (cl_ulong) count && k < sizeof(mask) / sizeof( mask[0]); k++ )
- {
- o[i++] = pattern[j] & mask[k];
- o[i++] = pattern[j] & ~mask[k];
- }
- }
+ for (i = 0; i < kTypeCount; i++)
+ if (name == strstr(name, gTypeNames[i]))
+ {
+ *inType = (Type)i;
+ name += strlen(gTypeNames[i]);
- for( ; i < (cl_ulong) count; i++ )
- o[i] = random64(d);
-}
+ break;
+ }
-void init_long( void *out, SaturationMode sat, RoundingMode round, Type destType, uint64_t start, int count, MTdata d )
-{
- init_ulong( out, sat, round, destType, start, count, d );
-}
+ if (i == kTypeCount) return -3;
-// ======
-
-void uchar2uchar_many( void *out, void *in, size_t n);
-void uchar2uchar_sat_many( void *out, void *in, size_t n);
-void char2uchar_many( void *out, void *in, size_t n);
-void char2uchar_sat_many( void *out, void *in, size_t n);
-void ushort2uchar_many( void *out, void *in, size_t n);
-void ushort2uchar_sat_many( void *out, void *in, size_t n);
-void short2uchar_many( void *out, void *in, size_t n);
-void short2uchar_sat_many( void *out, void *in, size_t n);
-void uint2uchar_many( void *out, void *in, size_t n);
-void uint2uchar_sat_many( void *out, void *in, size_t n);
-void int2uchar_many( void *out, void *in, size_t n);
-void int2uchar_sat_many( void *out, void *in, size_t n);
-void float2uchar_many( void *out, void *in, size_t n);
-void float2uchar_sat_many( void *out, void *in, size_t n);
-void double2uchar_many( void *out, void *in, size_t n);
-void double2uchar_sat_many( void *out, void *in, size_t n);
-void ulong2uchar_many( void *out, void *in, size_t n);
-void ulong2uchar_sat_many( void *out, void *in, size_t n);
-void long2uchar_many( void *out, void *in, size_t n);
-void long2uchar_sat_many( void *out, void *in, size_t n);
-void uchar2char_many( void *out, void *in, size_t n);
-void uchar2char_sat_many( void *out, void *in, size_t n);
-void char2char_many( void *out, void *in, size_t n);
-void char2char_sat_many( void *out, void *in, size_t n);
-void ushort2char_many( void *out, void *in, size_t n);
-void ushort2char_sat_many( void *out, void *in, size_t n);
-void short2char_many( void *out, void *in, size_t n);
-void short2char_sat_many( void *out, void *in, size_t n);
-void uint2char_many( void *out, void *in, size_t n);
-void uint2char_sat_many( void *out, void *in, size_t n);
-void int2char_many( void *out, void *in, size_t n);
-void int2char_sat_many( void *out, void *in, size_t n);
-void float2char_many( void *out, void *in, size_t n);
-void float2char_sat_many( void *out, void *in, size_t n);
-void double2char_many( void *out, void *in, size_t n);
-void double2char_sat_many( void *out, void *in, size_t n);
-void ulong2char_many( void *out, void *in, size_t n);
-void ulong2char_sat_many( void *out, void *in, size_t n);
-void long2char_many( void *out, void *in, size_t n);
-void long2char_sat_many( void *out, void *in, size_t n);
-void uchar2ushort_many( void *out, void *in, size_t n);
-void uchar2ushort_sat_many( void *out, void *in, size_t n);
-void char2ushort_many( void *out, void *in, size_t n);
-void char2ushort_sat_many( void *out, void *in, size_t n);
-void ushort2ushort_many( void *out, void *in, size_t n);
-void ushort2ushort_sat_many( void *out, void *in, size_t n);
-void short2ushort_many( void *out, void *in, size_t n);
-void short2ushort_sat_many( void *out, void *in, size_t n);
-void uint2ushort_many( void *out, void *in, size_t n);
-void uint2ushort_sat_many( void *out, void *in, size_t n);
-void int2ushort_many( void *out, void *in, size_t n);
-void int2ushort_sat_many( void *out, void *in, size_t n);
-void float2ushort_many( void *out, void *in, size_t n);
-void float2ushort_sat_many( void *out, void *in, size_t n);
-void double2ushort_many( void *out, void *in, size_t n);
-void double2ushort_sat_many( void *out, void *in, size_t n);
-void ulong2ushort_many( void *out, void *in, size_t n);
-void ulong2ushort_sat_many( void *out, void *in, size_t n);
-void long2ushort_many( void *out, void *in, size_t n);
-void long2ushort_sat_many( void *out, void *in, size_t n);
-void uchar2short_many( void *out, void *in, size_t n);
-void uchar2short_sat_many( void *out, void *in, size_t n);
-void char2short_many( void *out, void *in, size_t n);
-void char2short_sat_many( void *out, void *in, size_t n);
-void ushort2short_many( void *out, void *in, size_t n);
-void ushort2short_sat_many( void *out, void *in, size_t n);
-void short2short_many( void *out, void *in, size_t n);
-void short2short_sat_many( void *out, void *in, size_t n);
-void uint2short_many( void *out, void *in, size_t n);
-void uint2short_sat_many( void *out, void *in, size_t n);
-void int2short_many( void *out, void *in, size_t n);
-void int2short_sat_many( void *out, void *in, size_t n);
-void float2short_many( void *out, void *in, size_t n);
-void float2short_sat_many( void *out, void *in, size_t n);
-void double2short_many( void *out, void *in, size_t n);
-void double2short_sat_many( void *out, void *in, size_t n);
-void ulong2short_many( void *out, void *in, size_t n);
-void ulong2short_sat_many( void *out, void *in, size_t n);
-void long2short_many( void *out, void *in, size_t n);
-void long2short_sat_many( void *out, void *in, size_t n);
-void uchar2uint_many( void *out, void *in, size_t n);
-void uchar2uint_sat_many( void *out, void *in, size_t n);
-void char2uint_many( void *out, void *in, size_t n);
-void char2uint_sat_many( void *out, void *in, size_t n);
-void ushort2uint_many( void *out, void *in, size_t n);
-void ushort2uint_sat_many( void *out, void *in, size_t n);
-void short2uint_many( void *out, void *in, size_t n);
-void short2uint_sat_many( void *out, void *in, size_t n);
-void uint2uint_many( void *out, void *in, size_t n);
-void uint2uint_sat_many( void *out, void *in, size_t n);
-void int2uint_many( void *out, void *in, size_t n);
-void int2uint_sat_many( void *out, void *in, size_t n);
-void float2uint_many( void *out, void *in, size_t n);
-void float2uint_sat_many( void *out, void *in, size_t n);
-void double2uint_many( void *out, void *in, size_t n);
-void double2uint_sat_many( void *out, void *in, size_t n);
-void ulong2uint_many( void *out, void *in, size_t n);
-void ulong2uint_sat_many( void *out, void *in, size_t n);
-void long2uint_many( void *out, void *in, size_t n);
-void long2uint_sat_many( void *out, void *in, size_t n);
-void uchar2int_many( void *out, void *in, size_t n);
-void uchar2int_sat_many( void *out, void *in, size_t n);
-void char2int_many( void *out, void *in, size_t n);
-void char2int_sat_many( void *out, void *in, size_t n);
-void ushort2int_many( void *out, void *in, size_t n);
-void ushort2int_sat_many( void *out, void *in, size_t n);
-void short2int_many( void *out, void *in, size_t n);
-void short2int_sat_many( void *out, void *in, size_t n);
-void uint2int_many( void *out, void *in, size_t n);
-void uint2int_sat_many( void *out, void *in, size_t n);
-void int2int_many( void *out, void *in, size_t n);
-void int2int_sat_many( void *out, void *in, size_t n);
-void float2int_many( void *out, void *in, size_t n);
-void float2int_sat_many( void *out, void *in, size_t n);
-void double2int_many( void *out, void *in, size_t n);
-void double2int_sat_many( void *out, void *in, size_t n);
-void ulong2int_many( void *out, void *in, size_t n);
-void ulong2int_sat_many( void *out, void *in, size_t n);
-void long2int_many( void *out, void *in, size_t n);
-void long2int_sat_many( void *out, void *in, size_t n);
-void uchar2float_many( void *out, void *in, size_t n);
-void uchar2float_sat_many( void *out, void *in, size_t n);
-void char2float_many( void *out, void *in, size_t n);
-void char2float_sat_many( void *out, void *in, size_t n);
-void ushort2float_many( void *out, void *in, size_t n);
-void ushort2float_sat_many( void *out, void *in, size_t n);
-void short2float_many( void *out, void *in, size_t n);
-void short2float_sat_many( void *out, void *in, size_t n);
-void uint2float_many( void *out, void *in, size_t n);
-void uint2float_sat_many( void *out, void *in, size_t n);
-void int2float_many( void *out, void *in, size_t n);
-void int2float_sat_many( void *out, void *in, size_t n);
-void float2float_many( void *out, void *in, size_t n);
-void float2float_sat_many( void *out, void *in, size_t n);
-void double2float_many( void *out, void *in, size_t n);
-void double2float_sat_many( void *out, void *in, size_t n);
-void ulong2float_many( void *out, void *in, size_t n);
-void ulong2float_sat_many( void *out, void *in, size_t n);
-void long2float_many( void *out, void *in, size_t n);
-void long2float_sat_many( void *out, void *in, size_t n);
-void uchar2double_many( void *out, void *in, size_t n);
-void uchar2double_sat_many( void *out, void *in, size_t n);
-void char2double_many( void *out, void *in, size_t n);
-void char2double_sat_many( void *out, void *in, size_t n);
-void ushort2double_many( void *out, void *in, size_t n);
-void ushort2double_sat_many( void *out, void *in, size_t n);
-void short2double_many( void *out, void *in, size_t n);
-void short2double_sat_many( void *out, void *in, size_t n);
-void uint2double_many( void *out, void *in, size_t n);
-void uint2double_sat_many( void *out, void *in, size_t n);
-void int2double_many( void *out, void *in, size_t n);
-void int2double_sat_many( void *out, void *in, size_t n);
-void float2double_many( void *out, void *in, size_t n);
-void float2double_sat_many( void *out, void *in, size_t n);
-void double2double_many( void *out, void *in, size_t n);
-void double2double_sat_many( void *out, void *in, size_t n);
-void ulong2double_many( void *out, void *in, size_t n);
-void ulong2double_sat_many( void *out, void *in, size_t n);
-void long2double_many( void *out, void *in, size_t n);
-void long2double_sat_many( void *out, void *in, size_t n);
-void uchar2ulong_many( void *out, void *in, size_t n);
-void uchar2ulong_sat_many( void *out, void *in, size_t n);
-void char2ulong_many( void *out, void *in, size_t n);
-void char2ulong_sat_many( void *out, void *in, size_t n);
-void ushort2ulong_many( void *out, void *in, size_t n);
-void ushort2ulong_sat_many( void *out, void *in, size_t n);
-void short2ulong_many( void *out, void *in, size_t n);
-void short2ulong_sat_many( void *out, void *in, size_t n);
-void uint2ulong_many( void *out, void *in, size_t n);
-void uint2ulong_sat_many( void *out, void *in, size_t n);
-void int2ulong_many( void *out, void *in, size_t n);
-void int2ulong_sat_many( void *out, void *in, size_t n);
-void float2ulong_many( void *out, void *in, size_t n);
-void float2ulong_sat_many( void *out, void *in, size_t n);
-void double2ulong_many( void *out, void *in, size_t n);
-void double2ulong_sat_many( void *out, void *in, size_t n);
-void ulong2ulong_many( void *out, void *in, size_t n);
-void ulong2ulong_sat_many( void *out, void *in, size_t n);
-void long2ulong_many( void *out, void *in, size_t n);
-void long2ulong_sat_many( void *out, void *in, size_t n);
-void uchar2long_many( void *out, void *in, size_t n);
-void uchar2long_sat_many( void *out, void *in, size_t n);
-void char2long_many( void *out, void *in, size_t n);
-void char2long_sat_many( void *out, void *in, size_t n);
-void ushort2long_many( void *out, void *in, size_t n);
-void ushort2long_sat_many( void *out, void *in, size_t n);
-void short2long_many( void *out, void *in, size_t n);
-void short2long_sat_many( void *out, void *in, size_t n);
-void uint2long_many( void *out, void *in, size_t n);
-void uint2long_sat_many( void *out, void *in, size_t n);
-void int2long_many( void *out, void *in, size_t n);
-void int2long_sat_many( void *out, void *in, size_t n);
-void float2long_many( void *out, void *in, size_t n);
-void float2long_sat_many( void *out, void *in, size_t n);
-void double2long_many( void *out, void *in, size_t n);
-void double2long_sat_many( void *out, void *in, size_t n);
-void ulong2long_many( void *out, void *in, size_t n);
-void ulong2long_sat_many( void *out, void *in, size_t n);
-void long2long_many( void *out, void *in, size_t n);
-void long2long_sat_many( void *out, void *in, size_t n);
-
-void uchar2uchar_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); }
-void uchar2uchar_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uchar )); }
-void char2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }}
-void char2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_char)); }}
-void ushort2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ushort)); }}
-void short2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }}
-void short2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_short)); }}
-void uint2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }}
-void uint2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_uint)); }}
-void int2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }}
-void int2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_int)); }}
-void float2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }}
-void float2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_float)); }}
-void double2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }}
-void double2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_double)); }}
-void ulong2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_ulong)); }}
-void long2uchar_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }}
-void long2uchar_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uchar_sat( (char*) out + i * sizeof(cl_uchar), (char*) in + i * sizeof(cl_long)); }}
-void uchar2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uchar)); }}
-void char2char_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); }
-void char2char_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_char )); }
-void ushort2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ushort)); }}
-void short2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }}
-void short2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_short)); }}
-void uint2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }}
-void uint2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_uint)); }}
-void int2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }}
-void int2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_int)); }}
-void float2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }}
-void float2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_float)); }}
-void double2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }}
-void double2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_double)); }}
-void ulong2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_ulong)); }}
-void long2char_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }}
-void long2char_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2char_sat( (char*) out + i * sizeof(cl_char), (char*) in + i * sizeof(cl_long)); }}
-void uchar2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uchar)); }}
-void char2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }}
-void char2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_char)); }}
-void ushort2ushort_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); }
-void ushort2ushort_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ushort )); }
-void short2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }}
-void short2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_short)); }}
-void uint2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }}
-void uint2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_uint)); }}
-void int2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }}
-void int2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_int)); }}
-void float2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }}
-void float2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_float)); }}
-void double2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }}
-void double2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_double)); }}
-void ulong2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_ulong)); }}
-void long2ushort_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }}
-void long2ushort_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ushort_sat( (char*) out + i * sizeof(cl_ushort), (char*) in + i * sizeof(cl_long)); }}
-void uchar2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uchar)); }}
-void char2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }}
-void char2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_char)); }}
-void ushort2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ushort)); }}
-void short2short_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); }
-void short2short_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_short )); }
-void uint2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }}
-void uint2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_uint)); }}
-void int2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }}
-void int2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_int)); }}
-void float2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }}
-void float2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_float)); }}
-void double2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }}
-void double2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_double)); }}
-void ulong2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_ulong)); }}
-void long2short_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }}
-void long2short_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2short_sat( (char*) out + i * sizeof(cl_short), (char*) in + i * sizeof(cl_long)); }}
-void uchar2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_uchar)); }}
-void char2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }}
-void char2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_char)); }}
-void ushort2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ushort)); }}
-void short2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }}
-void short2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_short)); }}
-void uint2uint_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); }
-void uint2uint_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_uint )); }
-void int2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }}
-void int2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_int)); }}
-void float2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }}
-void float2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_float)); }}
-void double2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }}
-void double2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_double)); }}
-void ulong2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_ulong)); }}
-void long2uint_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }}
-void long2uint_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2uint_sat( (char*) out + i * sizeof(cl_uint), (char*) in + i * sizeof(cl_long)); }}
-void uchar2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uchar)); }}
-void char2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }}
-void char2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_char)); }}
-void ushort2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ushort)); }}
-void short2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }}
-void short2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_short)); }}
-void uint2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }}
-void uint2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_uint)); }}
-void int2int_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); }
-void int2int_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_int )); }
-void float2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }}
-void float2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_float)); }}
-void double2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }}
-void double2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_double)); }}
-void ulong2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_ulong)); }}
-void long2int_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }}
-void long2int_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2int_sat( (char*) out + i * sizeof(cl_int), (char*) in + i * sizeof(cl_long)); }}
-void uchar2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uchar)); }}
-void char2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }}
-void char2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_char)); }}
-void ushort2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ushort)); }}
-void short2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }}
-void short2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_short)); }}
-void uint2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }}
-void uint2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_uint)); }}
-void int2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }}
-void int2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_int)); }}
-void float2float_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); }
-void float2float_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_float )); }
-void double2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }}
-void double2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_double)); }}
-void ulong2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_ulong)); }}
-void long2float_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }}
-void long2float_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2float_sat( (char*) out + i * sizeof(cl_float), (char*) in + i * sizeof(cl_long)); }}
-void uchar2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uchar)); }}
-void char2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }}
-void char2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_char)); }}
-void ushort2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ushort)); }}
-void short2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }}
-void short2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_short)); }}
-void uint2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }}
-void uint2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_uint)); }}
-void int2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }}
-void int2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_int)); }}
-void float2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }}
-void float2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_float)); }}
-void double2double_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); }
-void double2double_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_double )); }
-void ulong2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_ulong)); }}
-void long2double_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }}
-void long2double_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2double_sat( (char*) out + i * sizeof(cl_double), (char*) in + i * sizeof(cl_long)); }}
-void uchar2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uchar)); }}
-void char2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }}
-void char2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_char)); }}
-void ushort2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_ushort)); }}
-void short2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }}
-void short2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_short)); }}
-void uint2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }}
-void uint2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_uint)); }}
-void int2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }}
-void int2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_int)); }}
-void float2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }}
-void float2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_float)); }}
-void double2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }}
-void double2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_double)); }}
-void ulong2ulong_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); }
-void ulong2ulong_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_ulong )); }
-void long2ulong_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }}
-void long2ulong_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ long2ulong_sat( (char*) out + i * sizeof(cl_ulong), (char*) in + i * sizeof(cl_long)); }}
-void uchar2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }}
-void uchar2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uchar2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uchar)); }}
-void char2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }}
-void char2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ char2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_char)); }}
-void ushort2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }}
-void ushort2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ushort2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ushort)); }}
-void short2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }}
-void short2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ short2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_short)); }}
-void uint2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }}
-void uint2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ uint2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_uint)); }}
-void int2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }}
-void int2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ int2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_int)); }}
-void float2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }}
-void float2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ float2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_float)); }}
-void double2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }}
-void double2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ double2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_double)); }}
-void ulong2long_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }}
-void ulong2long_sat_many( void *out, void *in, size_t n){size_t i; for( i = 0; i < n; i++){ ulong2long_sat( (char*) out + i * sizeof(cl_long), (char*) in + i * sizeof(cl_ulong)); }}
-void long2long_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); }
-void long2long_sat_many( void *out, void *in, size_t n){ memcpy( out, in, n * sizeof( cl_long )); }
-
-Convert gSaturatedConversions[kTypeCount][kTypeCount] = {
- { uchar2uchar_sat_many, char2uchar_sat_many, ushort2uchar_sat_many, short2uchar_sat_many, uint2uchar_sat_many, int2uchar_sat_many, float2uchar_sat_many, double2uchar_sat_many, ulong2uchar_sat_many, long2uchar_sat_many, },
- { uchar2char_sat_many, char2char_sat_many, ushort2char_sat_many, short2char_sat_many, uint2char_sat_many, int2char_sat_many, float2char_sat_many, double2char_sat_many, ulong2char_sat_many, long2char_sat_many, },
- { uchar2ushort_sat_many, char2ushort_sat_many, ushort2ushort_sat_many, short2ushort_sat_many, uint2ushort_sat_many, int2ushort_sat_many, float2ushort_sat_many, double2ushort_sat_many, ulong2ushort_sat_many, long2ushort_sat_many, },
- { uchar2short_sat_many, char2short_sat_many, ushort2short_sat_many, short2short_sat_many, uint2short_sat_many, int2short_sat_many, float2short_sat_many, double2short_sat_many, ulong2short_sat_many, long2short_sat_many, },
- { uchar2uint_sat_many, char2uint_sat_many, ushort2uint_sat_many, short2uint_sat_many, uint2uint_sat_many, int2uint_sat_many, float2uint_sat_many, double2uint_sat_many, ulong2uint_sat_many, long2uint_sat_many, },
- { uchar2int_sat_many, char2int_sat_many, ushort2int_sat_many, short2int_sat_many, uint2int_sat_many, int2int_sat_many, float2int_sat_many, double2int_sat_many, ulong2int_sat_many,long2int_sat_many, },
- { uchar2float_sat_many, char2float_sat_many, ushort2float_sat_many, short2float_sat_many, uint2float_sat_many, int2float_sat_many, float2float_sat_many, double2float_sat_many, ulong2float_sat_many, long2float_sat_many, },
- { uchar2double_sat_many, char2double_sat_many, ushort2double_sat_many, short2double_sat_many, uint2double_sat_many, int2double_sat_many, float2double_sat_many, double2double_sat_many, ulong2double_sat_many, long2double_sat_many, },
- { uchar2ulong_sat_many, char2ulong_sat_many, ushort2ulong_sat_many, short2ulong_sat_many, uint2ulong_sat_many, int2ulong_sat_many, float2ulong_sat_many, double2ulong_sat_many, ulong2ulong_sat_many, long2ulong_sat_many, },
- { uchar2long_sat_many, char2long_sat_many, ushort2long_sat_many, short2long_sat_many, uint2long_sat_many, int2long_sat_many, float2long_sat_many, double2long_sat_many, ulong2long_sat_many, long2long_sat_many, },
-};
+ if (*name != '\0') return -4;
-Convert gConversions[kTypeCount][kTypeCount] = {
- { uchar2uchar_many, char2uchar_many, ushort2uchar_many, short2uchar_many, uint2uchar_many, int2uchar_many, float2uchar_many, double2uchar_many, ulong2uchar_many, long2uchar_many, },
- { uchar2char_many, char2char_many, ushort2char_many, short2char_many, uint2char_many, int2char_many, float2char_many, double2char_many, ulong2char_many, long2char_many, },
- { uchar2ushort_many, char2ushort_many, ushort2ushort_many, short2ushort_many, uint2ushort_many, int2ushort_many, float2ushort_many, double2ushort_many, ulong2ushort_many, long2ushort_many, },
- { uchar2short_many, char2short_many, ushort2short_many, short2short_many, uint2short_many, int2short_many, float2short_many, double2short_many, ulong2short_many, long2short_many, },
- { uchar2uint_many, char2uint_many, ushort2uint_many, short2uint_many, uint2uint_many, int2uint_many, float2uint_many, double2uint_many, ulong2uint_many, long2uint_many, },
- { uchar2int_many, char2int_many, ushort2int_many, short2int_many, uint2int_many, int2int_many, float2int_many, double2int_many, ulong2int_many, long2int_many, },
- { uchar2float_many, char2float_many, ushort2float_many, short2float_many, uint2float_many, int2float_many, float2float_many, double2float_many, ulong2float_many, long2float_many, },
- { uchar2double_many, char2double_many, ushort2double_many, short2double_many, uint2double_many, int2double_many, float2double_many, double2double_many, ulong2double_many, long2double_many, },
- { uchar2ulong_many, char2ulong_many, ushort2ulong_many, short2ulong_many, uint2ulong_many, int2ulong_many, float2ulong_many, double2ulong_many, ulong2ulong_many, long2ulong_many, },
- { uchar2long_many, char2long_many, ushort2long_many, short2long_many, uint2long_many, int2long_many, float2long_many, double2long_many, ulong2long_many, long2long_many, },
-};
+ return 0;
+}
+
+} // namespace conv_test
diff --git a/test_conformance/conversions/basic_test_conversions.h b/test_conformance/conversions/basic_test_conversions.h
index ab887afd..2314ee74 100644
--- a/test_conformance/conversions/basic_test_conversions.h
+++ b/test_conformance/conversions/basic_test_conversions.h
@@ -1,6 +1,6 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
@@ -16,8 +16,6 @@
#ifndef BASIC_TEST_CONVERSIONS_H
#define BASIC_TEST_CONVERSIONS_H
-#include "harness/compat.h"
-
#if !defined(_WIN32)
#include <unistd.h>
#endif
@@ -33,22 +31,23 @@
#endif
#include "harness/mt19937.h"
+#include "harness/testHarness.h"
+#include "harness/typeWrappers.h"
-typedef void (*Convert)( void *dest, void *src, size_t );
+#include <memory>
+#include <tuple>
+#include <vector>
-#define kVectorSizeCount 6
-#define kMaxVectorSize 16
+#include "conversions_data_info.h"
-typedef enum
-{
- kUnsaturated = 0,
- kSaturated,
+#define kVectorSizeCount 6
+#define kMaxVectorSize 16
+#define kPageSize 4096
- kSaturationModeCount
-}SaturationMode;
+#define BUFFER_SIZE (1024 * 1024)
+#define EMBEDDED_REDUCTION_FACTOR 16
+#define PERF_LOOP_COUNT 100
-extern Convert gConversions[kTypeCount][kTypeCount]; // [dest format][source format]
-extern Convert gSaturatedConversions[kTypeCount][kTypeCount]; // [dest format][source format]
extern const char *gTypeNames[ kTypeCount ];
extern const char *gRoundingModeNames[ kRoundingModeCount ]; // { "", "_rte", "_rtp", "_rtn", "_rtz" }
extern const char *gSaturationNames[ kSaturationModeCount ]; // { "", "_sat" }
@@ -68,5 +67,324 @@ extern InitDataFunc gInitFunctions[ kTypeCount ];
typedef int (*CheckResults)( void *out1, void *out2, void *allowZ, uint32_t count, int vectorSize );
extern CheckResults gCheckResults[ kTypeCount ];
+#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
+
+extern MTdata gMTdata;
+extern cl_command_queue gQueue;
+extern cl_context gContext;
+extern cl_mem gInBuffer;
+extern cl_mem gOutBuffers[];
+extern int gHasDouble;
+extern int gTestDouble;
+extern int gWimpyMode;
+extern int gWimpyReductionFactor;
+extern int gSkipTesting;
+extern int gMinVectorSize;
+extern int gMaxVectorSize;
+extern int gForceFTZ;
+extern int gTimeResults;
+extern int gReportAverageTimes;
+extern int gStartTestNumber;
+extern int gEndTestNumber;
+extern int gIsRTZ;
+extern void *gIn;
+extern void *gRef;
+extern void *gAllowZ;
+extern void *gOut[];
+
+extern const char **argList;
+extern int argCount;
+
+extern const char *sizeNames[];
+extern int vectorSizes[];
+
+extern size_t gComputeDevices;
+extern uint32_t gDeviceFrequency;
+
+namespace conv_test {
+
+cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
+ RoundingMode round, int vectorSize,
+ cl_kernel *outKernel);
+
+int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf, size_t blockCount);
+
+int GetTestCase(const char *name, Type *outType, Type *inType,
+ SaturationMode *sat, RoundingMode *round);
+
+cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p);
+cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
+uint64_t GetTime(void);
+
+void WriteInputBufferComplete(void *);
+void *FlushToZero(void);
+void UnFlushToZero(void *);
+}
+
+struct CalcRefValsBase
+{
+ virtual ~CalcRefValsBase() = default;
+ virtual int check_result(void *, uint32_t, int) { return 0; }
+
+ // pointer back to the parent WriteInputBufferInfo struct
+ struct WriteInputBufferInfo *parent;
+ clKernelWrapper kernel; // the kernel for this vector size
+ clProgramWrapper program; // the program for this vector size
+ cl_uint vectorSize; // the vector size for this callback chain
+ void *p; // the pointer to mapped result data for this vector size
+ cl_int result;
+};
+
+template <typename InType, typename OutType>
+struct CalcRefValsPat : CalcRefValsBase
+{
+ int check_result(void *, uint32_t, int) override;
+};
+
+struct WriteInputBufferInfo
+{
+ WriteInputBufferInfo()
+ : calcReferenceValues(nullptr), doneBarrier(nullptr), count(0),
+ outType(kuchar), inType(kuchar), barrierCount(0)
+ {}
+
+ volatile cl_event
+ calcReferenceValues; // user event which signals when main thread is
+ // done calculating reference values
+ volatile cl_event
+ doneBarrier; // user event which signals when worker threads are done
+ cl_uint count; // the number of elements in the array
+ Type outType; // the data type of the conversion result
+ Type inType; // the data type of the conversion input
+ volatile int barrierCount;
+
+ std::vector<std::unique_ptr<CalcRefValsBase>> calcInfo;
+};
+
+// Must be aligned with Type enums!
+using TypeIter = std::tuple<cl_uchar, cl_char, cl_ushort, cl_short, cl_uint,
+ cl_int, cl_float, cl_double, cl_ulong, cl_long>;
+
+// Helper test fixture for constructing OpenCL objects used in testing
+// a variety of simple command-buffer enqueue scenarios.
+struct ConversionsTest
+{
+ virtual ~ConversionsTest() = default;
+
+ ConversionsTest(cl_device_id device, cl_context context,
+ cl_command_queue queue);
+
+ cl_int SetUp(int elements);
+
+ // Test body returning an OpenCL error code
+ cl_int Run();
+
+ template <typename InType, typename OutType>
+ int DoTest(Type outType, Type inType, SaturationMode sat,
+ RoundingMode round);
+
+ template <typename InType, typename OutType>
+ void TestTypesConversion(const Type &inType, const Type &outType, int &tn,
+ const int smvs);
+
+protected:
+ cl_context context;
+ cl_device_id device;
+ cl_command_queue queue;
+
+ size_t num_elements;
+
+ TypeIter typeIterator;
+};
+
+struct CustomConversionsTest : ConversionsTest
+{
+ CustomConversionsTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : ConversionsTest(device, context, queue)
+ {}
+
+ cl_int Run();
+};
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ T test_fixture(device, context, queue);
+
+ cl_int error = test_fixture.SetUp(num_elements);
+ test_error_ret(error, "Error in test initialization", TEST_FAIL);
+
+ return test_fixture.Run();
+}
+
+struct TestType
+{
+ template <typename T> bool testType(Type in)
+ {
+ switch (in)
+ {
+ default: return false;
+ case kuchar: return std::is_same<cl_uchar, T>::value;
+ case kchar: return std::is_same<cl_char, T>::value;
+ case kushort: return std::is_same<cl_ushort, T>::value;
+ case kshort: return std::is_same<cl_short, T>::value;
+ case kuint: return std::is_same<cl_uint, T>::value;
+ case kint: return std::is_same<cl_int, T>::value;
+ case kfloat: return std::is_same<cl_float, T>::value;
+ case kdouble: return std::is_same<cl_double, T>::value;
+ case kulong: return std::is_same<cl_ulong, T>::value;
+ case klong: return std::is_same<cl_long, T>::value;
+ }
+ }
+};
+
+// Helper structures to iterate over all tuple attributes of different types
+struct IterOverTypes : public TestType
+{
+ IterOverTypes(const TypeIter &typeIter, ConversionsTest &test)
+ : inType((Type)0), outType((Type)0), typeIter(typeIter), test(test),
+ testNumber(-1), startMinVectorSize(gMinVectorSize)
+ {}
+
+ void Run() { for_each_out_elem(typeIter); }
+
+protected:
+ template <std::size_t Out = 0, typename OutType>
+ void iterate_out_type(const OutType &t)
+ {
+ for_each_in_elem<0, Out, OutType>(typeIter);
+ outType = (Type)(outType + 1);
+ inType = (Type)0;
+ }
+
+ template <std::size_t In, std::size_t Out, typename OutType,
+ typename InType>
+ void iterate_in_type(const InType &t)
+ {
+ if (!testType<InType>(inType)) vlog_error("Unexpected data type!\n");
+
+ if (!testType<OutType>(outType)) vlog_error("Unexpected data type!\n");
+
+ // run the conversions
+ test.TestTypesConversion<InType, OutType>(inType, outType, testNumber,
+ startMinVectorSize);
+ inType = (Type)(inType + 1);
+ }
+
+ template <std::size_t Out = 0, typename... Tp>
+ inline typename std::enable_if<Out == sizeof...(Tp), void>::type
+ for_each_out_elem(
+ const std::tuple<Tp...> &) // Unused arguments are given no names.
+ {}
+
+ template <std::size_t Out = 0, typename... Tp>
+ inline typename std::enable_if < Out<sizeof...(Tp), void>::type
+ for_each_out_elem(const std::tuple<Tp...> &t)
+ {
+ iterate_out_type<Out>(std::get<Out>(t));
+ for_each_out_elem<Out + 1, Tp...>(t);
+ }
+
+ template <std::size_t In = 0, std::size_t Out, typename OutType,
+ typename... Tp>
+ inline typename std::enable_if<In == sizeof...(Tp), void>::type
+ for_each_in_elem(
+ const std::tuple<Tp...> &) // Unused arguments are given no names.
+ {}
+
+ template <std::size_t In = 0, std::size_t Out, typename OutType,
+ typename... Tp>
+ inline typename std::enable_if < In<sizeof...(Tp), void>::type
+ for_each_in_elem(const std::tuple<Tp...> &t)
+ {
+ iterate_in_type<In, Out, OutType>(std::get<In>(t));
+ for_each_in_elem<In + 1, Out, OutType, Tp...>(t);
+ }
+
+protected:
+ Type inType;
+ Type outType;
+ const TypeIter &typeIter;
+ ConversionsTest &test;
+ int testNumber;
+ int startMinVectorSize;
+};
+
+
+// Helper structures to select type 2 type conversion test case
+struct IterOverSelectedTypes : public TestType
+{
+ IterOverSelectedTypes(const TypeIter &typeIter, ConversionsTest &test,
+ const Type in, const Type out,
+ const RoundingMode round, const SaturationMode sat)
+ : inType(in), outType(out), rounding(round), saturation(sat),
+ typeIter(typeIter), test(test), testNumber(-1),
+ startMinVectorSize(gMinVectorSize)
+ {}
+
+ void Run() { for_each_out_elem(typeIter); }
+
+protected:
+ template <std::size_t Out = 0, typename OutType>
+ void iterate_out_type(const OutType &t)
+ {
+ for_each_in_elem<0, Out, OutType>(typeIter);
+ }
+
+ template <std::size_t In, std::size_t Out, typename OutType,
+ typename InType>
+ void iterate_in_type(const InType &t)
+ {
+ if (testType<InType>(inType) && testType<OutType>(outType))
+ {
+ // run selected conversion
+ // testing of the result will happen afterwards
+ test.DoTest<InType, OutType>(outType, inType, saturation, rounding);
+ }
+ }
+
+ template <std::size_t Out = 0, typename... Tp>
+ inline typename std::enable_if<Out == sizeof...(Tp), void>::type
+ for_each_out_elem(const std::tuple<Tp...> &)
+ {}
+
+ template <std::size_t Out = 0, typename... Tp>
+ inline typename std::enable_if < Out<sizeof...(Tp), void>::type
+ for_each_out_elem(const std::tuple<Tp...> &t)
+ {
+ iterate_out_type<Out>(std::get<Out>(t));
+ for_each_out_elem<Out + 1, Tp...>(t);
+ }
+
+ template <std::size_t In = 0, std::size_t Out, typename OutType,
+ typename... Tp>
+ inline typename std::enable_if<In == sizeof...(Tp), void>::type
+ for_each_in_elem(const std::tuple<Tp...> &)
+ {}
+
+ template <std::size_t In = 0, std::size_t Out, typename OutType,
+ typename... Tp>
+ inline typename std::enable_if < In<sizeof...(Tp), void>::type
+ for_each_in_elem(const std::tuple<Tp...> &t)
+ {
+ iterate_in_type<In, Out, OutType>(std::get<In>(t));
+ for_each_in_elem<In + 1, Out, OutType, Tp...>(t);
+ }
+
+protected:
+ Type inType;
+ Type outType;
+ RoundingMode rounding;
+ SaturationMode saturation;
+
+ const TypeIter &typeIter;
+ ConversionsTest &test;
+ int testNumber;
+ int startMinVectorSize;
+};
+
+
#endif /* BASIC_TEST_CONVERSIONS_H */
diff --git a/test_conformance/conversions/conversions_data_info.h b/test_conformance/conversions/conversions_data_info.h
new file mode 100644
index 00000000..4f46a24e
--- /dev/null
+++ b/test_conformance/conversions/conversions_data_info.h
@@ -0,0 +1,792 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef CONVERSIONS_DATA_INFO_H
+#define CONVERSIONS_DATA_INFO_H
+
+#if defined(__APPLE__)
+#include <OpenCL/opencl.h>
+#else
+#include <CL/opencl.h>
+#endif
+
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+#include "fplib.h"
+extern bool qcom_sat;
+extern roundingMode qcom_rm;
+#endif
+
+#include "harness/mt19937.h"
+#include "harness/rounding_mode.h"
+
+#include <vector>
+
+#if defined(__linux__)
+#include <sys/param.h>
+#include <libgen.h>
+#endif
+
+extern size_t gTypeSizes[kTypeCount];
+extern void *gIn;
+
+
+typedef enum
+{
+ kUnsaturated = 0,
+ kSaturated,
+
+ kSaturationModeCount
+} SaturationMode;
+
+struct DataInitInfo
+{
+ cl_ulong start;
+ cl_uint size;
+ Type outType;
+ Type inType;
+ SaturationMode sat;
+ RoundingMode round;
+ cl_uint threads;
+
+ static std::vector<uint32_t> specialValuesUInt;
+ static std::vector<float> specialValuesFloat;
+ static std::vector<double> specialValuesDouble;
+};
+
+struct DataInitBase : public DataInitInfo
+{
+ virtual ~DataInitBase() = default;
+
+ explicit DataInitBase(const DataInitInfo &agg): DataInitInfo(agg) {}
+ virtual void conv_array(void *out, void *in, size_t n) {}
+ virtual void conv_array_sat(void *out, void *in, size_t n) {}
+ virtual void init(const cl_uint &, const cl_uint &) {}
+};
+
+template <typename InType, typename OutType>
+struct DataInfoSpec : public DataInitBase
+{
+ explicit DataInfoSpec(const DataInitInfo &agg);
+
+ // helpers
+ float round_to_int(float f);
+ long long round_to_int_and_clamp(double d);
+
+ OutType absolute(const OutType &x);
+
+ // actual conversion of reference values
+ void conv(OutType *out, InType *in);
+ void conv_sat(OutType *out, InType *in);
+
+ // min/max ranges for output type of data
+ std::pair<OutType, OutType> ranges;
+
+ // matrix of clamping ranges for each rounding type
+ std::vector<std::pair<InType, InType>> clamp_ranges;
+
+ std::vector<MTdataHolder> mdv;
+
+ void conv_array(void *out, void *in, size_t n) override
+ {
+ for (size_t i = 0; i < n; i++)
+ conv(&((OutType *)out)[i], &((InType *)in)[i]);
+ }
+
+ void conv_array_sat(void *out, void *in, size_t n) override
+ {
+ for (size_t i = 0; i < n; i++)
+ conv_sat(&((OutType *)out)[i], &((InType *)in)[i]);
+ }
+
+ void init(const cl_uint &, const cl_uint &) override;
+ InType clamp(const InType &);
+ inline float fclamp(float lo, float v, float hi)
+ {
+ v = v < lo ? lo : v;
+ return v < hi ? v : hi;
+ }
+
+ inline double dclamp(double lo, double v, double hi)
+ {
+ v = v < lo ? lo : v;
+ return v < hi ? v : hi;
+ }
+};
+
+template <typename InType, typename OutType>
+DataInfoSpec<InType, OutType>::DataInfoSpec(const DataInitInfo &agg)
+ : DataInitBase(agg), mdv(0)
+{
+ if (std::is_same<cl_float, OutType>::value)
+ ranges = std::make_pair(CL_FLT_MIN, CL_FLT_MAX);
+ else if (std::is_same<cl_double, OutType>::value)
+ ranges = std::make_pair(CL_DBL_MIN, CL_DBL_MAX);
+ else if (std::is_same<cl_uchar, OutType>::value)
+ ranges = std::make_pair(0, CL_UCHAR_MAX);
+ else if (std::is_same<cl_char, OutType>::value)
+ ranges = std::make_pair(CL_CHAR_MIN, CL_CHAR_MAX);
+ else if (std::is_same<cl_ushort, OutType>::value)
+ ranges = std::make_pair(0, CL_USHRT_MAX);
+ else if (std::is_same<cl_short, OutType>::value)
+ ranges = std::make_pair(CL_SHRT_MIN, CL_SHRT_MAX);
+ else if (std::is_same<cl_uint, OutType>::value)
+ ranges = std::make_pair(0, CL_UINT_MAX);
+ else if (std::is_same<cl_int, OutType>::value)
+ ranges = std::make_pair(CL_INT_MIN, CL_INT_MAX);
+ else if (std::is_same<cl_ulong, OutType>::value)
+ ranges = std::make_pair(0, CL_ULONG_MAX);
+ else if (std::is_same<cl_long, OutType>::value)
+ ranges = std::make_pair(CL_LONG_MIN, CL_LONG_MAX);
+
+ // clang-format off
+ // for readability sake keep this section unformatted
+ if (std::is_floating_point<InType>::value)
+ { // from float/double
+ InType outMin = static_cast<InType>(ranges.first);
+ InType outMax = static_cast<InType>(ranges.second);
+
+ InType eps = std::is_same<InType, cl_float>::value ? (InType) FLT_EPSILON : (InType) DBL_EPSILON;
+ if (std::is_integral<OutType>::value)
+ { // to char/uchar/short/ushort/int/uint/long/ulong
+ if (sizeof(OutType)<=sizeof(cl_short))
+ { // to char/uchar/short/ushort
+ clamp_ranges=
+ {{outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
+ {outMin-0.5f, outMax + 0.5f - outMax * 0.5f * eps},
+ {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax-1.f},
+ {outMin-0.0f, outMax - outMax * 0.5f * eps },
+ {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, outMax - outMax * 0.5f * eps}};
+ }
+ else if (std::is_same<InType, cl_float>::value)
+ { // from float
+ if (std::is_same<OutType, cl_uint>::value)
+ { // to uint
+ clamp_ranges=
+ { {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
+ {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
+ {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)},
+ {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7) },
+ {outMin-1.0f+0.5f*eps, MAKE_HEX_FLOAT(0x1.fffffep31f, 0x1fffffeL, 7)}};
+ }
+ else if (std::is_same<OutType, cl_int>::value)
+ { // to int
+ clamp_ranges=
+ { {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
+ {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
+ {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)},
+ {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6) },
+ {outMin, MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffeL, 6)}};
+ }
+ else if (std::is_same<OutType, cl_ulong>::value)
+ { // to ulong
+ clamp_ranges=
+ {{outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
+ {outMin-0.5f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
+ {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)},
+ {outMin-0.0f, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39) },
+ {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_FLOAT(0x1.fffffep63f, 0x1fffffeL, 39)}};
+ }
+ else if (std::is_same<OutType, cl_long>::value)
+ { // to long
+ clamp_ranges=
+ { {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
+ {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
+ {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
+ {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)},
+ {MAKE_HEX_FLOAT(-0x1.0p63f, -0x1L, 63), MAKE_HEX_FLOAT(0x1.fffffep62f, 0x1fffffeL, 38)}};
+ }
+ }
+ else
+ { // from double
+ if (std::is_same<OutType, cl_uint>::value)
+ { // to uint
+ clamp_ranges=
+ { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
+ {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p31, 0x1LL, 31) * eps},
+ {outMin-1.0f+0.5f*eps, outMax},
+ {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21) },
+ {outMin-1.0f+0.5f*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp31, 0x1fffffffffffffLL, -21)}};
+ }
+ else if (std::is_same<OutType, cl_int>::value)
+ { // to int
+ clamp_ranges=
+ { {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
+ {outMin-0.5f, outMax + 0.5 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps},
+ {outMin-1.0f+outMax*eps, outMax},
+ {outMin-0.0f, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps },
+ {outMin-1.0f+outMax*eps, outMax + 1.0 - MAKE_HEX_DOUBLE(0x1.0p30, 0x1LL, 30) * eps}};
+ }
+ else if (std::is_same<OutType, cl_ulong>::value)
+ { // to ulong
+ clamp_ranges=
+ {{outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
+ {outMin-0.5f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
+ {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)},
+ {outMin-0.0f, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11) },
+ {outMin-1.0f+(std::is_signed<OutType>::value?outMax:0.5f)*eps, MAKE_HEX_DOUBLE(0x1.fffffffffffffp63, 0x1fffffffffffffLL, 11)}};
+ }
+ else if (std::is_same<OutType, cl_long>::value)
+ { // to long
+ clamp_ranges=
+ { {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
+ {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
+ {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
+ {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)},
+ {MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63), MAKE_HEX_DOUBLE(0x1.fffffffffffffp62, 0x1fffffffffffffLL, 10)}};
+ }
+ }
+ }
+ }
+ // clang-format on
+}
+
+template <typename InType, typename OutType>
+float DataInfoSpec<InType, OutType>::round_to_int(float f)
+{
+ static const float magic[2] = { MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23),
+ -MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23) };
+
+ // Round fractional values to integer in round towards nearest mode
+ if (fabsf(f) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1, 23))
+ {
+ volatile float x = f;
+ float magicVal = magic[f < 0];
+
+#if defined(__SSE__)
+ // Defeat x87 based arithmetic, which cant do FTZ, and will round this
+ // incorrectly
+ __m128 v = _mm_set_ss(x);
+ __m128 m = _mm_set_ss(magicVal);
+ v = _mm_add_ss(v, m);
+ v = _mm_sub_ss(v, m);
+ _mm_store_ss((float *)&x, v);
+#else
+ x += magicVal;
+ x -= magicVal;
+#endif
+ f = x;
+ }
+ return f;
+}
+
+template <typename InType, typename OutType>
+long long DataInfoSpec<InType, OutType>::round_to_int_and_clamp(double f)
+{
+ static const double magic[2] = { MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52),
+ MAKE_HEX_DOUBLE(-0x1.0p52, -0x1LL, 52) };
+
+ if (f >= -(double)LLONG_MIN) return LLONG_MAX;
+
+ if (f <= (double)LLONG_MIN) return LLONG_MIN;
+
+ // Round fractional values to integer in round towards nearest mode
+ if (fabs(f) < MAKE_HEX_DOUBLE(0x1.0p52, 0x1LL, 52))
+ {
+ volatile double x = f;
+ double magicVal = magic[f < 0];
+#if defined(__SSE2__) || defined(_MSC_VER)
+ // Defeat x87 based arithmetic, which cant do FTZ, and will round this
+ // incorrectly
+ __m128d v = _mm_set_sd(x);
+ __m128d m = _mm_set_sd(magicVal);
+ v = _mm_add_sd(v, m);
+ v = _mm_sub_sd(v, m);
+ _mm_store_sd((double *)&x, v);
+#else
+ x += magicVal;
+ x -= magicVal;
+#endif
+ f = x;
+ }
+ return (long long)f;
+}
+
+template <typename InType, typename OutType>
+OutType DataInfoSpec<InType, OutType>::absolute(const OutType &x)
+{
+ union {
+ cl_uint u;
+ OutType f;
+ } u;
+ u.f = x;
+ if (std::is_same<OutType, float>::value)
+ u.u &= 0x7fffffff;
+ else if (std::is_same<OutType, double>::value)
+ u.u &= 0x7fffffffffffffffULL;
+ else
+ log_error("Unexpected argument type of DataInfoSpec::absolute");
+
+ return u.f;
+}
+
+template <typename InType, typename OutType>
+void DataInfoSpec<InType, OutType>::conv(OutType *out, InType *in)
+{
+ if (std::is_same<cl_float, InType>::value)
+ {
+ cl_float inVal = *in;
+
+ if (std::is_floating_point<OutType>::value)
+ {
+ *out = (OutType)inVal;
+ }
+ else if (std::is_same<cl_ulong, OutType>::value)
+ {
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+ // VS2005 (at least) on x86 uses fistp to store the float as a
+ // 64-bit int. However, fistp stores it as a signed int, and some of
+ // the test values won't fit into a signed int. (These test values
+ // are >= 2^63.) The result on VS2005 is that these end up silently
+ // (at least by default settings) clamped to the max lowest ulong.
+ cl_float x = round_to_int(inVal);
+ if (x >= 9223372036854775808.0f)
+ {
+ x -= 9223372036854775808.0f;
+ ((cl_ulong *)out)[0] = x;
+ ((cl_ulong *)out)[0] += 9223372036854775808ULL;
+ }
+ else
+ {
+ ((cl_ulong *)out)[0] = x;
+ }
+#else
+ *out = round_to_int(inVal);
+#endif
+ }
+ else if (std::is_same<cl_long, OutType>::value)
+ {
+ *out = round_to_int_and_clamp(inVal);
+ }
+ else
+ *out = round_to_int(inVal);
+ }
+ else if (std::is_same<cl_double, InType>::value)
+ {
+ if (std::is_same<cl_float, OutType>::value)
+ *out = (OutType)*in;
+ else
+ *out = rint(*in);
+ }
+ else if (std::is_same<cl_ulong, InType>::value
+ || std::is_same<cl_long, InType>::value)
+ {
+ if (std::is_same<cl_double, OutType>::value)
+ {
+#if defined(_MSC_VER)
+ cl_ulong l = ((cl_ulong *)in)[0];
+ double result;
+
+ if (std::is_same<cl_ulong, InType>::value)
+ {
+ cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
+ : (cl_long)l;
+#if defined(_M_X64)
+ _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), sl));
+#else
+ result = sl;
+#endif
+ ((double *)out)[0] =
+ (l == 0 ? 0.0 : (((cl_long)l < 0) ? result * 2.0 : result));
+ }
+ else
+ {
+ _mm_store_sd(&result, _mm_cvtsi64_sd(_mm_setzero_pd(), l));
+ ((double *)out)[0] =
+ (l == 0 ? 0.0 : result); // Per IEEE-754-2008 5.4.1, 0's
+ // always convert to +0.0
+ }
+#else
+ // Use volatile to prevent optimization by Clang compiler
+ volatile InType vi = *in;
+ *out = (vi == 0 ? 0.0 : static_cast<OutType>(vi));
+#endif
+ }
+ else if (std::is_same<cl_float, OutType>::value)
+ {
+ cl_float outVal = 0.f;
+
+#if defined(_MSC_VER) && defined(_M_X64)
+ cl_ulong l = ((cl_ulong *)in)[0];
+ float result;
+ if (std::is_same<cl_ulong, InType>::value)
+ {
+ cl_long sl = ((cl_long)l < 0) ? (cl_long)((l >> 1) | (l & 1))
+ : (cl_long)l;
+ _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), sl));
+ outVal = (l == 0 ? 0.0f
+ : (((cl_long)l < 0) ? result * 2.0f : result));
+ }
+ else
+ {
+ _mm_store_ss(&result, _mm_cvtsi64_ss(_mm_setzero_ps(), l));
+ outVal = (l == 0 ? 0.0f : result); // Per IEEE-754-2008 5.4.1,
+ // 0's always convert to +0.0
+ }
+#else
+ InType l = ((InType *)in)[0];
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+ /* ARM VFP doesn't have hardware instruction for converting from
+ * 64-bit integer to float types, hence GCC ARM uses the
+ * floating-point emulation code despite which -mfloat-abi setting
+ * it is. But the emulation code in libgcc.a has only one rounding
+ * mode (round to nearest even in this case) and ignores the user
+ * rounding mode setting in hardware. As a result setting rounding
+ * modes in hardware won't give correct rounding results for type
+ * covert from 64-bit integer to float using GCC for ARM compiler so
+ * for testing different rounding modes, we need to use alternative
+ * reference function. ARM64 does have an instruction, however we
+ * cannot guarantee the compiler will use it. On all ARM
+ * architechures use emulation to calculate reference.*/
+ if (std::is_same<cl_ulong, InType>::value)
+ outVal = qcom_u64_2_f32(l, qcom_sat, qcom_rm);
+ else
+ outVal = (l == 0 ? 0.0f : qcom_s64_2_f32(l, qcom_sat, qcom_rm));
+#else
+ outVal = (l == 0 ? 0.0f : (float)l); // Per IEEE-754-2008 5.4.1, 0's
+ // always convert to +0.0
+#endif
+#endif
+
+ *out = outVal;
+ }
+ else
+ {
+ *out = (OutType)*in;
+ }
+ }
+ else
+ {
+ if (std::is_same<cl_float, OutType>::value)
+ {
+ // Use volatile to prevent optimization by Clang compiler
+ volatile InType vi = *in;
+ // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
+ *out = (vi == 0 ? 0.0f : vi);
+ }
+ else if (std::is_same<cl_double, OutType>::value)
+ {
+ // Per IEEE-754-2008 5.4.1, 0 always converts to +0.0
+ *out = (*in == 0 ? 0.0 : *in);
+ }
+ else
+ {
+ *out = (OutType)*in;
+ }
+ }
+}
+
+#define CLAMP(_lo, _x, _hi) \
+ ((_x) < (_lo) ? (_lo) : ((_x) > (_hi) ? (_hi) : (_x)))
+
+template <typename InType, typename OutType>
+void DataInfoSpec<InType, OutType>::conv_sat(OutType *out, InType *in)
+{
+ if (std::is_floating_point<InType>::value)
+ {
+ if (std::is_floating_point<OutType>::value)
+ { // in float/double, out float/double
+ *out = (OutType)(*in);
+ }
+ else if ((std::is_same<InType, cl_float>::value)
+ && std::is_same<cl_ulong, OutType>::value)
+ {
+ cl_float x = round_to_int(*in);
+
+#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))
+ // VS2005 (at least) on x86 uses fistp to store the float as a
+ // 64-bit int. However, fistp stores it as a signed int, and some of
+ // the test values won't fit into a signed int. (These test values
+ // are >= 2^63.) The result on VS2005 is that these end up silently
+ // (at least by default settings) clamped to the max lowest ulong.
+ if (x >= 18446744073709551616.0f)
+ { // 2^64
+ *out = 0xFFFFFFFFFFFFFFFFULL;
+ }
+ else if (x < 0)
+ {
+ *out = 0;
+ }
+ else if (x >= 9223372036854775808.0f)
+ { // 2^63
+ x -= 9223372036854775808.0f;
+ *out = x;
+ *out += 9223372036854775808ULL;
+ }
+ else
+ {
+ *out = x;
+ }
+#else
+ *out = x >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
+ ? 0xFFFFFFFFFFFFFFFFULL
+ : x < 0 ? 0 : (OutType)x;
+#endif
+ }
+ else if ((std::is_same<InType, cl_float>::value)
+ && std::is_same<cl_long, OutType>::value)
+ {
+ cl_float f = round_to_int(*in);
+ *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
+ ? 0x7FFFFFFFFFFFFFFFULL
+ : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
+ ? 0x8000000000000000LL
+ : (OutType)f;
+ }
+ else if (std::is_same<InType, cl_double>::value
+ && std::is_same<cl_ulong, OutType>::value)
+ {
+ InType f = rint(*in);
+ *out = f >= MAKE_HEX_DOUBLE(0x1.0p64, 0x1LL, 64)
+ ? 0xFFFFFFFFFFFFFFFFULL
+ : f < 0 ? 0 : (OutType)f;
+ }
+ else if (std::is_same<InType, cl_double>::value
+ && std::is_same<cl_long, OutType>::value)
+ {
+ InType f = rint(*in);
+ *out = f >= MAKE_HEX_DOUBLE(0x1.0p63, 0x1LL, 63)
+ ? 0x7FFFFFFFFFFFFFFFULL
+ : f < MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63)
+ ? 0x8000000000000000LL
+ : (OutType)f;
+ }
+ else
+ { // in float/double, out char/uchar/short/ushort/int/uint
+ *out =
+ CLAMP(ranges.first, round_to_int_and_clamp(*in), ranges.second);
+ }
+ }
+ else if (std::is_integral<InType>::value
+ && std::is_integral<OutType>::value)
+ {
+ {
+ if ((std::is_signed<InType>::value
+ && std::is_signed<OutType>::value)
+ || (!std::is_signed<InType>::value
+ && !std::is_signed<OutType>::value))
+ {
+ if (sizeof(InType) <= sizeof(OutType))
+ {
+ *out = (OutType)*in;
+ }
+ else
+ {
+ *out = CLAMP(ranges.first, *in, ranges.second);
+ }
+ }
+ else
+ { // mixed signed/unsigned types
+ if (sizeof(InType) < sizeof(OutType))
+ {
+ *out = (!std::is_signed<InType>::value)
+ ? (OutType)*in
+ : CLAMP(0, *in, ranges.second); // *in < 0 ? 0 : *in
+ }
+ else
+ { // bigger/equal mixed signed/unsigned types - always clamp
+ *out = CLAMP(0, *in, ranges.second);
+ }
+ }
+ }
+ }
+ else
+ { // InType integral, OutType floating
+ *out = std::is_signed<InType>::value ? (OutType)*in
+ : absolute((OutType)*in);
+ }
+}
+
+template <typename InType, typename OutType>
+void DataInfoSpec<InType, OutType>::init(const cl_uint &job_id,
+ const cl_uint &thread_id)
+{
+ uint64_t ulStart = start;
+ void *pIn = (char *)gIn + job_id * size * gTypeSizes[inType];
+
+ if (std::is_integral<InType>::value)
+ {
+ InType *o = (InType *)pIn;
+ if (sizeof(InType) <= sizeof(cl_short))
+ { // char/uchar/ushort/short
+ for (int i = 0; i < size; i++) o[i] = ulStart++;
+ }
+ else if (sizeof(InType) <= sizeof(cl_int))
+ { // int/uint
+ int i = 0;
+ if (gIsEmbedded)
+ for (i = 0; i < size; i++)
+ o[i] = (InType)genrand_int32(mdv[thread_id]);
+ else
+ for (i = 0; i < size; i++) o[i] = (InType)i + ulStart;
+
+ if (0 == ulStart)
+ {
+ size_t tableSize = specialValuesUInt.size()
+ * sizeof(decltype(specialValuesUInt)::value_type);
+ if (sizeof(InType) * size < tableSize)
+ tableSize = sizeof(InType) * size;
+ memcpy((char *)(o + i) - tableSize, &specialValuesUInt.front(),
+ tableSize);
+ }
+ }
+ else
+ { // long/ulong
+ cl_ulong *o = (cl_ulong *)pIn;
+ cl_ulong i, j, k;
+
+ i = 0;
+ if (ulStart == 0)
+ {
+ // Try various powers of two
+ for (j = 0; j < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
+ o[j] = (cl_ulong)1 << j;
+ i = j;
+
+ // try the complement of those
+ for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
+ o[i++] = ~((cl_ulong)1 << j);
+
+ // Try various negative powers of two
+ for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
+ o[i++] = (cl_ulong)0xFFFFFFFFFFFFFFFEULL << j;
+
+ // try various powers of two plus 1, shifted by various amounts
+ for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
+ for (k = 0;
+ i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
+ k++)
+ o[i++] = (((cl_ulong)1 << j) + 1) << k;
+
+ // try various powers of two minus 1
+ for (j = 0; i < (cl_ulong)size && j < 8 * sizeof(cl_ulong); j++)
+ for (k = 0;
+ i < (cl_ulong)size && k < 8 * sizeof(cl_ulong) - j;
+ k++)
+ o[i++] = (((cl_ulong)1 << j) - 1) << k;
+
+ // Other patterns
+ cl_ulong pattern[] = {
+ 0x3333333333333333ULL, 0x5555555555555555ULL,
+ 0x9999999999999999ULL, 0x6666666666666666ULL,
+ 0xccccccccccccccccULL, 0xaaaaaaaaaaaaaaaaULL
+ };
+ cl_ulong mask[] = { 0xffffffffffffffffULL,
+ 0xff00ff00ff00ff00ULL,
+ 0xffff0000ffff0000ULL,
+ 0xffffffff00000000ULL };
+ for (j = 0; i < (cl_ulong)size
+ && j < sizeof(pattern) / sizeof(pattern[0]);
+ j++)
+ for (k = 0; i + 2 <= (cl_ulong)size
+ && k < sizeof(mask) / sizeof(mask[0]);
+ k++)
+ {
+ o[i++] = pattern[j] & mask[k];
+ o[i++] = pattern[j] & ~mask[k];
+ }
+ }
+
+ auto &md = mdv[thread_id];
+ for (; i < (cl_ulong)size; i++)
+ o[i] = (cl_ulong)genrand_int32(md)
+ | ((cl_ulong)genrand_int32(md) << 32);
+ }
+ } // integrals
+ else if (std::is_same<InType, cl_float>::value)
+ {
+ cl_uint *o = (cl_uint *)pIn;
+ int i;
+
+ if (gIsEmbedded)
+ for (i = 0; i < size; i++)
+ o[i] = (cl_uint)genrand_int32(mdv[thread_id]);
+ else
+ for (i = 0; i < size; i++) o[i] = (cl_uint)i + ulStart;
+
+ if (0 == ulStart)
+ {
+ size_t tableSize = specialValuesFloat.size()
+ * sizeof(decltype(specialValuesFloat)::value_type);
+ if (sizeof(InType) * size < tableSize)
+ tableSize = sizeof(InType) * size;
+ memcpy((char *)(o + i) - tableSize, &specialValuesFloat.front(),
+ tableSize);
+ }
+
+ if (kUnsaturated == sat)
+ {
+ InType *f = (InType *)pIn;
+ for (i = 0; i < size; i++) f[i] = clamp(f[i]);
+ }
+ }
+ else if (std::is_same<InType, cl_double>::value)
+ {
+ InType *o = (InType *)pIn;
+ int i = 0;
+
+ union {
+ uint64_t u;
+ InType d;
+ } u;
+
+ for (i = 0; i < size; i++)
+ {
+ uint64_t z = i + ulStart;
+
+ uint32_t bits = ((uint32_t)z ^ (uint32_t)(z >> 32));
+ // split 0x89abcdef to 0x89abc00000000def
+ u.u = bits & 0xfffU;
+ u.u |= (uint64_t)(bits & ~0xfffU) << 32;
+ // sign extend the leading bit of def segment as sign bit so that
+ // the middle region consists of either all 1s or 0s
+ u.u -= (bits & 0x800U) << 1;
+ o[i] = u.d;
+ }
+
+ if (0 == ulStart)
+ {
+ size_t tableSize = specialValuesDouble.size()
+ * sizeof(decltype(specialValuesDouble)::value_type);
+ if (sizeof(InType) * size < tableSize)
+ tableSize = sizeof(InType) * size;
+ memcpy((char *)(o + i) - tableSize, &specialValuesDouble.front(),
+ tableSize);
+ }
+
+ if (0 == sat)
+ for (i = 0; i < size; i++) o[i] = clamp(o[i]);
+ }
+}
+
+template <typename InType, typename OutType>
+InType DataInfoSpec<InType, OutType>::clamp(const InType &in)
+{
+ if (std::is_integral<OutType>::value)
+ {
+ if (std::is_same<InType, cl_float>::value)
+ {
+ return fclamp(clamp_ranges[round].first, in,
+ clamp_ranges[round].second);
+ }
+ else if (std::is_same<InType, cl_double>::value)
+ {
+ return dclamp(clamp_ranges[round].first, in,
+ clamp_ranges[round].second);
+ }
+ }
+ return in;
+}
+
+#endif /* CONVERSIONS_DATA_INFO_H */
diff --git a/test_conformance/conversions/fplib.h b/test_conformance/conversions/fplib.h
index 534550a3..c69b1e89 100644
--- a/test_conformance/conversions/fplib.h
+++ b/test_conformance/conversions/fplib.h
@@ -13,6 +13,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
+#ifndef CONVERSIONS_FPLIB_H
+#define CONVERSIONS_FPLIB_H
+
#include <stdbool.h>
#include <stdint.h>
@@ -28,3 +31,5 @@ typedef enum
float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd);
float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd);
+
+#endif
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 2ee05463..dab61dc5 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -13,12 +13,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#include "harness/compat.h"
-#include "harness/rounding_mode.h"
#include "harness/ThreadPool.h"
#include "harness/testHarness.h"
-#include "harness/kernelHelpers.h"
#include "harness/parseParameters.h"
+#include "harness/mt19937.h"
+
#if defined(__APPLE__)
#include <sys/sysctl.h>
#endif
@@ -33,7 +32,6 @@
#include <libgen.h>
#endif
-#include "mingw_compat.h"
#if defined(__MINGW32__)
#include <sys/param.h>
#endif
@@ -49,283 +47,77 @@
#include <time.h>
#include <algorithm>
+#include <type_traits>
+#include <vector>
#include "Sleep.h"
-#include "basic_test_conversions.h"
-
-#if (defined(_WIN32) && defined(_MSC_VER))
-// need for _controlfp_s and rouinding modes in RoundingMode
-#include "harness/testHarness.h"
-#endif
-
-#pragma mark -
-#pragma mark globals
-
-#define BUFFER_SIZE (1024 * 1024)
-#define kPageSize 4096
-#define EMBEDDED_REDUCTION_FACTOR 16
-#define PERF_LOOP_COUNT 100
-#define kCallStyleCount (kVectorSizeCount + 1 /* for implicit scalar */)
+#include "basic_test_conversions.h"
+#include <climits>
+#include <cstring>
#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
#include "fplib.h"
-extern bool qcom_sat;
-extern roundingMode qcom_rm;
#endif
-const char **argList = NULL;
-int argCount = 0;
-cl_context gContext = NULL;
-cl_command_queue gQueue = NULL;
-char appName[64] = "ctest";
-int gStartTestNumber = -1;
-int gEndTestNumber = 0;
-#if defined(__APPLE__)
-int gTimeResults = 1;
-#else
-int gTimeResults = 0;
+#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
+/* Rounding modes and saturation for use with qcom 64 bit to float conversion
+ * library */
+bool qcom_sat;
+roundingMode qcom_rm;
#endif
-int gReportAverageTimes = 0;
-void *gIn = NULL;
-void *gRef = NULL;
-void *gAllowZ = NULL;
-void *gOut[kCallStyleCount] = { NULL };
-cl_mem gInBuffer;
-cl_mem gOutBuffers[kCallStyleCount];
-size_t gComputeDevices = 0;
-uint32_t gDeviceFrequency = 0;
-int gWimpyMode = 0;
-int gWimpyReductionFactor = 128;
-int gSkipTesting = 0;
-int gForceFTZ = 0;
-int gMultithread = 1;
-int gIsRTZ = 0;
-uint32_t gSimdSize = 1;
-int gHasDouble = 0;
-int gTestDouble = 1;
-const char *sizeNames[] = { "", "", "2", "3", "4", "8", "16" };
-const int vectorSizes[] = { 1, 1, 2, 3, 4, 8, 16 };
-int gMinVectorSize = 0;
-int gMaxVectorSize = sizeof(vectorSizes) / sizeof(vectorSizes[0]);
-static MTdata gMTdata;
-
-#pragma mark -
-#pragma mark Declarations
+
static int ParseArgs(int argc, const char **argv);
static void PrintUsage(void);
test_status InitCL(cl_device_id device);
-static int GetTestCase(const char *name, Type *outType, Type *inType,
- SaturationMode *sat, RoundingMode *round);
-static int DoTest(cl_device_id device, Type outType, Type inType,
- SaturationMode sat, RoundingMode round, MTdata d);
-static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
- RoundingMode round, int vectorSize,
- cl_kernel *outKernel);
-static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf,
- size_t blockCount);
-
-void *FlushToZero(void);
-void UnFlushToZero(void *);
-
-// Windows (since long double got deprecated) sets the x87 to 53-bit precision
-// (that's x87 default state). This causes problems with the tests that
-// convert long and ulong to float and double or otherwise deal with values
-// that need more precision than 53-bit. So, set the x87 to 64-bit precision.
-static inline void Force64BitFPUPrecision(void)
-{
-#if __MINGW32__
- // The usual method is to use _controlfp as follows:
- // #include <float.h>
- // _controlfp(_PC_64, _MCW_PC);
- //
- // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
- // divergent code just use inline assembly which works for both.
- unsigned short int orig_cw = 0;
- unsigned short int new_cw = 0;
- __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
- new_cw = orig_cw | 0x0300; // set precision to 64-bit
- __asm__ __volatile__("fldcw %0" ::"m"(new_cw));
-#else
- /* Implement for other platforms if needed */
-#endif
-}
-int test_conversions(cl_device_id device, cl_context context,
- cl_command_queue queue, int num_elements)
-{
- int error, i, testNumber = -1;
- int startMinVectorSize = gMinVectorSize;
- Type inType, outType;
- RoundingMode round;
- SaturationMode sat;
- if (argCount)
- {
- for (i = 0; i < argCount; i++)
- {
- if (GetTestCase(argList[i], &outType, &inType, &sat, &round))
- {
- vlog_error("\n\t\t**** ERROR: Unable to parse function name "
- "%s. Skipping.... *****\n\n",
- argList[i]);
- continue;
- }
+const char *gTypeNames[kTypeCount] = { "uchar", "char", "ushort", "short",
+ "uint", "int", "float", "double",
+ "ulong", "long" };
- // skip double if we don't have it
- if (!gTestDouble && (inType == kdouble || outType == kdouble))
- {
- if (gHasDouble)
- {
- vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
- gTypeNames[outType], gSaturationNames[sat],
- gRoundingModeNames[round], gTypeNames[inType]);
- vlog("\t\tcl_khr_fp64 enabled, but double testing turned "
- "off.\n");
- }
+const char *gRoundingModeNames[kRoundingModeCount] = { "", "_rte", "_rtp",
+ "_rtn", "_rtz" };
- continue;
- }
+const char *gSaturationNames[2] = { "", "_sat" };
- // skip longs on embedded
- if (!gHasLong
- && (inType == klong || outType == klong || inType == kulong
- || outType == kulong))
- {
- continue;
- }
+size_t gTypeSizes[kTypeCount] = {
+ sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short),
+ sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float), sizeof(cl_double),
+ sizeof(cl_ulong), sizeof(cl_long),
+};
- // Skip the implicit converts if the rounding mode is not default or
- // test is saturated
- if (0 == startMinVectorSize)
- {
- if (sat || round != kDefaultRoundingMode)
- gMinVectorSize = 1;
- else
- gMinVectorSize = 0;
- }
+char appName[64] = "ctest";
+int gMultithread = 1;
- if ((error = DoTest(device, outType, inType, sat, round, gMTdata)))
- {
- vlog_error("\t *** convert_%sn%s%s( %sn ) FAILED ** \n",
- gTypeNames[outType], gSaturationNames[sat],
- gRoundingModeNames[round], gTypeNames[inType]);
- }
- }
+
+int test_conversions(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ if (argCount)
+ {
+ return MakeAndRunTest<CustomConversionsTest>(device, context, queue,
+ num_elements);
}
else
{
- for (outType = (Type)0; outType < kTypeCount;
- outType = (Type)(outType + 1))
- {
- for (inType = (Type)0; inType < kTypeCount;
- inType = (Type)(inType + 1))
- {
- // skip longs on embedded
- if (!gHasLong
- && (inType == klong || outType == klong || inType == kulong
- || outType == kulong))
- {
- continue;
- }
-
- for (sat = (SaturationMode)0; sat < kSaturationModeCount;
- sat = (SaturationMode)(sat + 1))
- {
- // skip illegal saturated conversions to float type
- if (kSaturated == sat
- && (outType == kfloat || outType == kdouble))
- {
- continue;
- }
-
- for (round = (RoundingMode)0; round < kRoundingModeCount;
- round = (RoundingMode)(round + 1))
- {
- if (++testNumber < gStartTestNumber)
- {
- // vlog( "%d) skipping convert_%sn%s%s( %sn
- // )\n", testNumber, gTypeNames[ outType ],
- // gSaturationNames[ sat ],
- // gRoundingModeNames[round], gTypeNames[inType]
- // );
- continue;
- }
- else
- {
- if (gEndTestNumber > 0
- && testNumber >= gEndTestNumber)
- {
- goto exit;
- }
- }
-
- vlog("%d) Testing convert_%sn%s%s( %sn ):\n",
- testNumber, gTypeNames[outType],
- gSaturationNames[sat], gRoundingModeNames[round],
- gTypeNames[inType]);
-
- // skip double if we don't have it
- if (!gTestDouble
- && (inType == kdouble || outType == kdouble))
- {
- if (gHasDouble)
- {
- vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
- "FAILED ** \n",
- testNumber, gTypeNames[outType],
- gSaturationNames[sat],
- gRoundingModeNames[round],
- gTypeNames[inType]);
- vlog("\t\tcl_khr_fp64 enabled, but double "
- "testing turned off.\n");
- }
- continue;
- }
-
- // Skip the implicit converts if the rounding mode is
- // not default or test is saturated
- if (0 == startMinVectorSize)
- {
- if (sat || round != kDefaultRoundingMode)
- gMinVectorSize = 1;
- else
- gMinVectorSize = 0;
- }
-
- if ((error = DoTest(device, outType, inType, sat, round,
- gMTdata)))
- {
- vlog_error("\t *** %d) convert_%sn%s%s( %sn ) "
- "FAILED ** \n",
- testNumber, gTypeNames[outType],
- gSaturationNames[sat],
- gRoundingModeNames[round],
- gTypeNames[inType]);
- }
- }
- }
- }
- }
+ return MakeAndRunTest<ConversionsTest>(device, context, queue,
+ num_elements);
}
-
-exit:
- return gFailCount;
}
+
test_definition test_list[] = {
ADD_TEST(conversions),
};
const int test_num = ARRAY_SIZE(test_list);
-#pragma mark -
int main(int argc, const char **argv)
{
int error;
- cl_uint seed = (cl_uint)time(NULL);
argc = parseCustomParam(argc, argv);
if (argc == -1)
@@ -352,8 +144,8 @@ int main(int argc, const char **argv)
#endif
vlog("===========================================================\n");
- vlog("Random seed: %u\n", seed);
- gMTdata = init_genrand(seed);
+ vlog("Random seed: %u\n", gRandomSeed);
+ gMTdata = init_genrand(gRandomSeed);
const char *arg[] = { argv[0] };
int ret =
@@ -378,8 +170,6 @@ int main(int argc, const char **argv)
return ret;
}
-#pragma mark -
-#pragma mark setup
static int ParseArgs(int argc, const char **argv)
{
@@ -509,7 +299,7 @@ static int ParseArgs(int argc, const char **argv)
gWimpyMode = 1;
}
- vlog( "\n" );
+ vlog("\n");
PrintArch();
@@ -526,6 +316,7 @@ static int ParseArgs(int argc, const char **argv)
return 0;
}
+
static void PrintUsage(void)
{
int i;
@@ -564,63 +355,6 @@ static void PrintUsage(void)
}
-static int GetTestCase(const char *name, Type *outType, Type *inType,
- SaturationMode *sat, RoundingMode *round)
-{
- int i;
-
- // Find the return type
- for (i = 0; i < kTypeCount; i++)
- if (name == strstr(name, gTypeNames[i]))
- {
- *outType = (Type)i;
- name += strlen(gTypeNames[i]);
-
- break;
- }
-
- if (i == kTypeCount) return -1;
-
- // Check to see if _sat appears next
- *sat = (SaturationMode)0;
- for (i = 1; i < kSaturationModeCount; i++)
- if (name == strstr(name, gSaturationNames[i]))
- {
- *sat = (SaturationMode)i;
- name += strlen(gSaturationNames[i]);
- break;
- }
-
- *round = (RoundingMode)0;
- for (i = 1; i < kRoundingModeCount; i++)
- if (name == strstr(name, gRoundingModeNames[i]))
- {
- *round = (RoundingMode)i;
- name += strlen(gRoundingModeNames[i]);
- break;
- }
-
- if (*name != '_') return -2;
- name++;
-
- for (i = 0; i < kTypeCount; i++)
- if (name == strstr(name, gTypeNames[i]))
- {
- *inType = (Type)i;
- name += strlen(gTypeNames[i]);
-
- break;
- }
-
- if (i == kTypeCount) return -3;
-
- if (*name != '\0') return -4;
-
- return 0;
-}
-
-#pragma mark -
-#pragma mark OpenCL
test_status InitCL(cl_device_id device)
{
@@ -678,6 +412,20 @@ test_status InitCL(cl_device_id device)
}
gTestDouble &= gHasDouble;
+ // detect whether profile of the device is embedded
+ char profile[1024] = "";
+ if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile),
+ profile, NULL)))
+ {
+ vlog_error("clGetDeviceInfo failed. (%d)\n", error);
+ return TEST_FAIL;
+ }
+ else if (strstr(profile, "EMBEDDED_PROFILE"))
+ {
+ gIsEmbedded = 1;
+ if (!is_extension_available(device, "cles_khr_int64")) gHasLong = 0;
+ }
+
gContext = clCreateContext(NULL, 1, &device, notify_callback, NULL, &error);
if (NULL == gContext || error)
{
@@ -726,10 +474,6 @@ test_status InitCL(cl_device_id device)
}
}
-
- gMTdata = init_genrand(gRandomSeed);
-
-
char c[1024];
static const char *no_yes[] = { "NO", "YES" };
vlog("\nCompute Device info:\n");
@@ -760,977 +504,4 @@ test_status InitCL(cl_device_id device)
return TEST_PASS;
}
-static int RunKernel(cl_kernel kernel, void *inBuf, void *outBuf,
- size_t blockCount)
-{
- // The global dimensions are just the blockCount to execute since we haven't
- // set up multiple queues for multiple devices.
- int error;
-
- error = clSetKernelArg(kernel, 0, sizeof(inBuf), &inBuf);
- error |= clSetKernelArg(kernel, 1, sizeof(outBuf), &outBuf);
-
- if (error)
- {
- vlog_error("FAILED -- could not set kernel args (%d)\n", error);
- return error;
- }
-
- if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &blockCount,
- NULL, 0, NULL, NULL)))
- {
- vlog_error("FAILED -- could not execute kernel (%d)\n", error);
- return error;
- }
-
- return 0;
-}
-
-#if defined(__APPLE__)
-#include <mach/mach_time.h>
-#endif
-
-uint64_t GetTime(void);
-uint64_t GetTime(void)
-{
-#if defined(__APPLE__)
- return mach_absolute_time();
-#elif defined(_MSC_VER)
- return ReadTime();
-#else
- // mach_absolute_time is a high precision timer with precision < 1
- // microsecond.
-#warning need accurate clock here. Times are invalid.
- return 0;
-#endif
-}
-
-
-#if defined(_MSC_VER)
-/* function is defined in "compat.h" */
-#else
-double SubtractTime(uint64_t endTime, uint64_t startTime);
-double SubtractTime(uint64_t endTime, uint64_t startTime)
-{
- uint64_t diff = endTime - startTime;
- static double conversion = 0.0;
-
- if (0.0 == conversion)
- {
-#if defined(__APPLE__)
- mach_timebase_info_data_t info = { 0, 0 };
- kern_return_t err = mach_timebase_info(&info);
- if (0 == err)
- conversion = 1e-9 * (double)info.numer / (double)info.denom;
-#else
- // This function consumes output from GetTime() above, and converts the
- // time to secionds.
-#warning need accurate ticks to seconds conversion factor here. Times are invalid.
-#endif
- }
-
- // strictly speaking we should also be subtracting out timer latency here
- return conversion * (double)diff;
-}
-#endif
-
-typedef struct CalcReferenceValuesInfo
-{
- struct WriteInputBufferInfo
- *parent; // pointer back to the parent WriteInputBufferInfo struct
- cl_kernel kernel; // the kernel for this vector size
- cl_program program; // the program for this vector size
- cl_uint vectorSize; // the vector size for this callback chain
- void *p; // the pointer to mapped result data for this vector size
- cl_int result;
-} CalcReferenceValuesInfo;
-
-typedef struct WriteInputBufferInfo
-{
- volatile cl_event
- calcReferenceValues; // user event which signals when main thread is
- // done calculating reference values
- volatile cl_event
- doneBarrier; // user event which signals when worker threads are done
- cl_uint count; // the number of elements in the array
- Type outType; // the data type of the conversion result
- Type inType; // the data type of the conversion input
- volatile int barrierCount;
- CalcReferenceValuesInfo calcInfo[kCallStyleCount];
-} WriteInputBufferInfo;
-
-cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
-cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
-{
- if (0 == (x & (x - 1))) return x;
-
- while (x & (x - 1)) x &= x - 1;
-
- return x + x;
-}
-
-void WriteInputBufferComplete(void *);
-
-typedef struct DataInitInfo
-{
- cl_ulong start;
- cl_uint size;
- Type outType;
- Type inType;
- SaturationMode sat;
- RoundingMode round;
- MTdata *d;
-} DataInitInfo;
-
-cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p);
-cl_int InitData(cl_uint job_id, cl_uint thread_id, void *p)
-{
- DataInitInfo *info = (DataInitInfo *)p;
-
- gInitFunctions[info->inType](
- (char *)gIn + job_id * info->size * gTypeSizes[info->inType], info->sat,
- info->round, info->outType, info->start + job_id * info->size,
- info->size, info->d[thread_id]);
- return CL_SUCCESS;
-}
-
-static void setAllowZ(uint8_t *allow, uint32_t *x, cl_uint count)
-{
- cl_uint i;
- for (i = 0; i < count; ++i)
- allow[i] |= (uint8_t)((x[i] & 0x7f800000U) == 0);
-}
-
-cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p);
-cl_int PrepareReference(cl_uint job_id, cl_uint thread_id, void *p)
-{
- DataInitInfo *info = (DataInitInfo *)p;
- cl_uint count = info->size;
- Type inType = info->inType;
- Type outType = info->outType;
- RoundingMode round = info->round;
- size_t j;
-
- Force64BitFPUPrecision();
-
- void *s = (cl_uchar *)gIn + job_id * count * gTypeSizes[info->inType];
- void *a = (cl_uchar *)gAllowZ + job_id * count;
- void *d = (cl_uchar *)gRef + job_id * count * gTypeSizes[info->outType];
-
- if (outType != inType)
- {
- // create the reference while we wait
- Convert f = gConversions[outType][inType];
- if (info->sat) f = gSaturatedConversions[outType][inType];
-
-#if (defined(__arm__) || defined(__aarch64__)) && defined(__GNUC__)
- /* ARM VFP doesn't have hardware instruction for converting from 64-bit
- * integer to float types, hence GCC ARM uses the floating-point
- * emulation code despite which -mfloat-abi setting it is. But the
- * emulation code in libgcc.a has only one rounding mode (round to
- * nearest even in this case) and ignores the user rounding mode setting
- * in hardware. As a result setting rounding modes in hardware won't
- * give correct rounding results for type covert from 64-bit integer to
- * float using GCC for ARM compiler so for testing different rounding
- * modes, we need to use alternative reference function. ARM64 does have
- * an instruction, however we cannot guarantee the compiler will use it.
- * On all ARM architechures use emulation to calculate reference.*/
- switch (round)
- {
- /* conversions to floating-point type use the current rounding mode.
- * The only default floating-point rounding mode supported is round
- * to nearest even i.e the current rounding mode will be _rte for
- * floating-point types. */
- case kDefaultRoundingMode: qcom_rm = qcomRTE; break;
- case kRoundToNearestEven: qcom_rm = qcomRTE; break;
- case kRoundUp: qcom_rm = qcomRTP; break;
- case kRoundDown: qcom_rm = qcomRTN; break;
- case kRoundTowardZero: qcom_rm = qcomRTZ; break;
- default:
- vlog_error("ERROR: undefined rounding mode %d\n", round);
- break;
- }
- qcom_sat = info->sat;
-#endif
-
- RoundingMode oldRound = set_round(round, outType);
- f(d, s, count);
- set_round(oldRound, outType);
-
- // Decide if we allow a zero result in addition to the correctly rounded
- // one
- memset(a, 0, count);
- if (gForceFTZ)
- {
- if (inType == kfloat) setAllowZ((uint8_t *)a, (uint32_t *)s, count);
- if (outType == kfloat)
- setAllowZ((uint8_t *)a, (uint32_t *)d, count);
- }
- }
- else
- {
- // Copy the input to the reference
- memcpy(d, s, info->size * gTypeSizes[inType]);
- }
-
- // Patch up NaNs conversions to integer to zero -- these can be converted to
- // any integer
- if (info->outType != kfloat && info->outType != kdouble)
- {
- if (inType == kfloat)
- {
- float *inp = (float *)s;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j]))
- memset((char *)d + j * gTypeSizes[outType], 0,
- gTypeSizes[outType]);
- }
- }
- if (inType == kdouble)
- {
- double *inp = (double *)s;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j]))
- memset((char *)d + j * gTypeSizes[outType], 0,
- gTypeSizes[outType]);
- }
- }
- }
- else if (inType == kfloat || inType == kdouble)
- { // outtype and intype is float or double. NaN conversions for float <->
- // double can be any NaN
- if (inType == kfloat && outType == kdouble)
- {
- float *inp = (float *)s;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j])) ((double *)d)[j] = NAN;
- }
- }
- if (inType == kdouble && outType == kfloat)
- {
- double *inp = (double *)s;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j])) ((float *)d)[j] = NAN;
- }
- }
- }
-
- return CL_SUCCESS;
-}
-
-static int DoTest(cl_device_id device, Type outType, Type inType,
- SaturationMode sat, RoundingMode round, MTdata d)
-{
-#ifdef __APPLE__
- cl_ulong wall_start = mach_absolute_time();
-#endif
-
- DataInitInfo init_info = { 0, 0, outType, inType, sat, round, NULL };
- WriteInputBufferInfo writeInputBufferInfo;
- int vectorSize;
- int error = 0;
- cl_uint threads = GetThreadCount();
- uint64_t i;
-
- gTestCount++;
- size_t blockCount =
- BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
- size_t step = blockCount;
- uint64_t lastCase = 1ULL << (8 * gTypeSizes[inType]);
-
- memset(&writeInputBufferInfo, 0, sizeof(writeInputBufferInfo));
- init_info.d = (MTdata *)malloc(threads * sizeof(MTdata));
- if (NULL == init_info.d)
- {
- vlog_error(
- "ERROR: Unable to allocate storage for random number generator!\n");
- return -1;
- }
- for (i = 0; i < threads; i++)
- {
- init_info.d[i] = init_genrand(genrand_int32(d));
- if (NULL == init_info.d[i])
- {
- vlog_error("ERROR: Unable to allocate storage for random number "
- "generator!\n");
- return -1;
- }
- }
-
- writeInputBufferInfo.outType = outType;
- writeInputBufferInfo.inType = inType;
-
- for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
- {
- writeInputBufferInfo.calcInfo[vectorSize].program =
- MakeProgram(outType, inType, sat, round, vectorSize,
- &writeInputBufferInfo.calcInfo[vectorSize].kernel);
- if (NULL == writeInputBufferInfo.calcInfo[vectorSize].program)
- {
- gFailCount++;
- return -1;
- }
- if (NULL == writeInputBufferInfo.calcInfo[vectorSize].kernel)
- {
- gFailCount++;
- vlog_error("\t\tFAILED -- Failed to create kernel.\n");
- return -2;
- }
-
- writeInputBufferInfo.calcInfo[vectorSize].parent =
- &writeInputBufferInfo;
- writeInputBufferInfo.calcInfo[vectorSize].vectorSize = vectorSize;
- writeInputBufferInfo.calcInfo[vectorSize].result = -1;
- }
-
- if (gSkipTesting) goto exit;
-
- // Patch up rounding mode if default is RTZ
- // We leave the part above in default rounding mode so that the right kernel
- // is compiled.
- if (round == kDefaultRoundingMode && gIsRTZ && (outType == kfloat))
- init_info.round = round = kRoundTowardZero;
-
- // Figure out how many elements are in a work block
-
- // we handle 64-bit types a bit differently.
- if (8 * gTypeSizes[inType] > 32) lastCase = 0x100000000ULL;
-
- if (!gWimpyMode && gIsEmbedded)
- step = blockCount * EMBEDDED_REDUCTION_FACTOR;
-
- if (gWimpyMode) step = (size_t)blockCount * (size_t)gWimpyReductionFactor;
- vlog("Testing... ");
- fflush(stdout);
- for (i = 0; i < (uint64_t)lastCase; i += step)
- {
-
- if (0 == (i & ((lastCase >> 3) - 1)))
- {
- vlog(".");
- fflush(stdout);
- }
-
- cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
- writeInputBufferInfo.count = count;
-
- // Crate a user event to represent the status of the reference value
- // computation completion
- writeInputBufferInfo.calcReferenceValues =
- clCreateUserEvent(gContext, &error);
- if (error || NULL == writeInputBufferInfo.calcReferenceValues)
- {
- vlog_error("ERROR: Unable to create user event. (%d)\n", error);
- gFailCount++;
- goto exit;
- }
-
- // retain for consumption by MapOutputBufferComplete
- for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
- vectorSize++)
- {
- if ((error =
- clRetainEvent(writeInputBufferInfo.calcReferenceValues)))
- {
- vlog_error("ERROR: Unable to retain user event. (%d)\n", error);
- gFailCount++;
- goto exit;
- }
- }
-
- // Crate a user event to represent when the callbacks are done verifying
- // correctness
- writeInputBufferInfo.doneBarrier = clCreateUserEvent(gContext, &error);
- if (error || NULL == writeInputBufferInfo.calcReferenceValues)
- {
- vlog_error("ERROR: Unable to create user event for barrier. (%d)\n",
- error);
- gFailCount++;
- goto exit;
- }
-
- // retain for use by the callback that calls this
- if ((error = clRetainEvent(writeInputBufferInfo.doneBarrier)))
- {
- vlog_error("ERROR: Unable to retain user event doneBarrier. (%d)\n",
- error);
- gFailCount++;
- goto exit;
- }
-
- // Call this in a multithreaded manner
- // gInitFunctions[ inType ]( gIn, sat, round, outType, i, count, d
- // );
- cl_uint chunks = RoundUpToNextPowerOfTwo(threads) * 2;
- init_info.start = i;
- init_info.size = count / chunks;
- if (init_info.size < 16384)
- {
- chunks = RoundUpToNextPowerOfTwo(threads);
- init_info.size = count / chunks;
- if (init_info.size < 16384)
- {
- init_info.size = count;
- chunks = 1;
- }
- }
- ThreadPool_Do(InitData, chunks, &init_info);
-
- // Copy the results to the device
- if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_TRUE, 0,
- count * gTypeSizes[inType], gIn, 0,
- NULL, NULL)))
- {
- vlog_error("ERROR: clEnqueueWriteBuffer failed. (%d)\n", error);
- gFailCount++;
- goto exit;
- }
-
- // Call completion callback for the write, which will enqueue the rest
- // of the work.
- WriteInputBufferComplete((void *)&writeInputBufferInfo);
-
- // Make sure the work is actually running, so we don't deadlock
- if ((error = clFlush(gQueue)))
- {
- vlog_error("clFlush failed with error %d\n", error);
- gFailCount++;
- goto exit;
- }
-
- ThreadPool_Do(PrepareReference, chunks, &init_info);
-
- // signal we are done calculating the reference results
- if ((error = clSetUserEventStatus(
- writeInputBufferInfo.calcReferenceValues, CL_COMPLETE)))
- {
- vlog_error(
- "Error: Failed to set user event status to CL_COMPLETE: %d\n",
- error);
- gFailCount++;
- goto exit;
- }
-
- // Wait for the event callbacks to finish verifying correctness.
- if ((error = clWaitForEvents(
- 1, (cl_event *)&writeInputBufferInfo.doneBarrier)))
- {
- vlog_error("Error: Failed to wait for barrier: %d\n", error);
- gFailCount++;
- goto exit;
- }
-
- if ((error = clReleaseEvent(writeInputBufferInfo.calcReferenceValues)))
- {
- vlog_error("Error: Failed to release calcReferenceValues: %d\n",
- error);
- gFailCount++;
- goto exit;
- }
-
- if ((error = clReleaseEvent(writeInputBufferInfo.doneBarrier)))
- {
- vlog_error("Error: Failed to release done barrier: %d\n", error);
- gFailCount++;
- goto exit;
- }
-
-
- for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
- vectorSize++)
- {
- if ((error = writeInputBufferInfo.calcInfo[vectorSize].result))
- {
- switch (inType)
- {
- case kuchar:
- case kchar:
- vlog("Input value: 0x%2.2x ",
- ((unsigned char *)gIn)[error - 1]);
- break;
- case kushort:
- case kshort:
- vlog("Input value: 0x%4.4x ",
- ((unsigned short *)gIn)[error - 1]);
- break;
- case kuint:
- case kint:
- vlog("Input value: 0x%8.8x ",
- ((unsigned int *)gIn)[error - 1]);
- break;
- case kfloat:
- vlog("Input value: %a ", ((float *)gIn)[error - 1]);
- break;
- break;
- case kulong:
- case klong:
- vlog("Input value: 0x%16.16llx ",
- ((unsigned long long *)gIn)[error - 1]);
- break;
- case kdouble:
- vlog("Input value: %a ", ((double *)gIn)[error - 1]);
- break;
- default:
- vlog_error("Internal error at %s: %d\n", __FILE__,
- __LINE__);
- abort();
- break;
- }
-
- // tell the user which conversion it was.
- if (0 == vectorSize)
- vlog(" (implicit scalar conversion from %s to %s)\n",
- gTypeNames[inType], gTypeNames[outType]);
- else
- vlog(" (convert_%s%s%s%s( %s%s ))\n", gTypeNames[outType],
- sizeNames[vectorSize], gSaturationNames[sat],
- gRoundingModeNames[round], gTypeNames[inType],
- sizeNames[vectorSize]);
-
- gFailCount++;
- goto exit;
- }
- }
- }
-
- log_info("done.\n");
-
- if (gTimeResults)
- {
- // Kick off tests for the various vector lengths
- for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize;
- vectorSize++)
- {
- size_t workItemCount = blockCount / vectorSizes[vectorSize];
- if (vectorSizes[vectorSize] * gTypeSizes[outType] < 4)
- workItemCount /=
- 4 / (vectorSizes[vectorSize] * gTypeSizes[outType]);
-
- double sum = 0.0;
- double bestTime = INFINITY;
- cl_uint k;
- for (k = 0; k < PERF_LOOP_COUNT; k++)
- {
- uint64_t startTime = GetTime();
- if ((error = RunKernel(
- writeInputBufferInfo.calcInfo[vectorSize].kernel,
- gInBuffer, gOutBuffers[vectorSize], workItemCount)))
- {
- gFailCount++;
- goto exit;
- }
-
- // Make sure OpenCL is done
- if ((error = clFinish(gQueue)))
- {
- vlog_error("Error %d at clFinish\n", error);
- goto exit;
- }
-
- uint64_t endTime = GetTime();
- double time = SubtractTime(endTime, startTime);
- sum += time;
- if (time < bestTime) bestTime = time;
- }
-
- if (gReportAverageTimes) bestTime = sum / PERF_LOOP_COUNT;
- double clocksPerOp = bestTime * (double)gDeviceFrequency
- * gComputeDevices * gSimdSize * 1e6
- / (workItemCount * vectorSizes[vectorSize]);
- if (0 == vectorSize)
- vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
- "implicit convert %s -> %s", gTypeNames[inType],
- gTypeNames[outType]);
- else
- vlog_perf(clocksPerOp, LOWER_IS_BETTER, "clocks / element",
- "convert_%s%s%s%s( %s%s )", gTypeNames[outType],
- sizeNames[vectorSize], gSaturationNames[sat],
- gRoundingModeNames[round], gTypeNames[inType],
- sizeNames[vectorSize]);
- }
- }
-
- if (gWimpyMode)
- vlog("\tWimp pass");
- else
- vlog("\tpassed");
-
-#ifdef __APPLE__
- // record the run time
- vlog("\t(%f s)", 1e-9 * (mach_absolute_time() - wall_start));
-#endif
- vlog("\n\n");
- fflush(stdout);
-
-
-exit:
- // clean up
- for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
- {
- clReleaseProgram(writeInputBufferInfo.calcInfo[vectorSize].program);
- clReleaseKernel(writeInputBufferInfo.calcInfo[vectorSize].kernel);
- }
-
- if (init_info.d)
- {
- for (i = 0; i < threads; i++) free_mtdata(init_info.d[i]);
- free(init_info.d);
- }
-
- return error;
-}
-
-void MapResultValuesComplete(void *data);
-
-// Note: not called reentrantly
-void WriteInputBufferComplete(void *data)
-{
- cl_int status;
- WriteInputBufferInfo *info = (WriteInputBufferInfo *)data;
- cl_uint count = info->count;
- int vectorSize;
-
- info->barrierCount = gMaxVectorSize - gMinVectorSize;
-
- // now that we know that the write buffer is complete, enqueue callbacks to
- // wait for the main thread to finish calculating the reference results.
- for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
- {
- size_t workItemCount =
- (count + vectorSizes[vectorSize] - 1) / (vectorSizes[vectorSize]);
-
- if ((status = RunKernel(info->calcInfo[vectorSize].kernel, gInBuffer,
- gOutBuffers[vectorSize], workItemCount)))
- {
- gFailCount++;
- return;
- }
-
- info->calcInfo[vectorSize].p = clEnqueueMapBuffer(
- gQueue, gOutBuffers[vectorSize], CL_TRUE,
- CL_MAP_READ | CL_MAP_WRITE, 0, count * gTypeSizes[info->outType], 0,
- NULL, NULL, &status);
- {
- if (status)
- {
- vlog_error("ERROR: WriteInputBufferComplete calback failed "
- "with status: %d\n",
- status);
- gFailCount++;
- return;
- }
- }
- }
-
- for (vectorSize = gMinVectorSize; vectorSize < gMaxVectorSize; vectorSize++)
- {
- MapResultValuesComplete(info->calcInfo + vectorSize);
- }
-
- // Make sure the work starts moving -- otherwise we may deadlock
- if ((status = clFlush(gQueue)))
- {
- vlog_error(
- "ERROR: WriteInputBufferComplete calback failed with status: %d\n",
- status);
- gFailCount++;
- return;
- }
-
- // e was already released by the main thread. It should be destroyed
- // automatically soon after we exit.
-}
-void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
- void *data);
-
-// Note: May be called reentrantly
-void MapResultValuesComplete(void *data)
-{
- cl_int status;
- CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data;
- cl_event calcReferenceValues = info->parent->calcReferenceValues;
-
- // we know that the map is done, wait for the main thread to finish
- // calculating the reference values
- if ((status = clSetEventCallback(calcReferenceValues, CL_COMPLETE,
- CalcReferenceValuesComplete, data)))
- {
- vlog_error("ERROR: clSetEventCallback failed in "
- "MapResultValuesComplete with status: %d\n",
- status);
- gFailCount++; // not thread safe -- being lazy here
- }
-
- // this thread no longer needs its reference to info->calcReferenceValues,
- // so release it
- if ((status = clReleaseEvent(calcReferenceValues)))
- {
- vlog_error("ERROR: clReleaseEvent(info->calcReferenceValues) failed "
- "with status: %d\n",
- status);
- gFailCount++; // not thread safe -- being lazy here
- }
-
- // no need to flush since we didn't enqueue anything
-
- // e was already released by WriteInputBufferComplete. It should be
- // destroyed automatically soon after we exit.
-}
-
-
-void CL_CALLBACK CalcReferenceValuesComplete(cl_event e, cl_int status,
- void *data)
-{
- CalcReferenceValuesInfo *info = (CalcReferenceValuesInfo *)data;
- cl_uint vectorSize = info->vectorSize;
- cl_uint count = info->parent->count;
- Type outType =
- info->parent->outType; // the data type of the conversion result
- Type inType = info->parent->inType; // the data type of the conversion input
- size_t j;
- cl_int error;
- cl_event doneBarrier = info->parent->doneBarrier;
-
- // report spurious error condition
- if (CL_SUCCESS != status)
- {
- vlog_error("ERROR: CalcReferenceValuesComplete did not succeed! (%d)\n",
- status);
- gFailCount++; // lazy about thread safety here
- return;
- }
-
- // Now we know that both results have been mapped back from the device, and
- // the main thread is done calculating the reference results. It is now time
- // to check the results.
-
- // verify results
- void *mapped = info->p;
-
- // Patch up NaNs conversions to integer to zero -- these can be converted to
- // any integer
- if (outType != kfloat && outType != kdouble)
- {
- if (inType == kfloat)
- {
- float *inp = (float *)gIn;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j]))
- memset((char *)mapped + j * gTypeSizes[outType], 0,
- gTypeSizes[outType]);
- }
- }
- if (inType == kdouble)
- {
- double *inp = (double *)gIn;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j]))
- memset((char *)mapped + j * gTypeSizes[outType], 0,
- gTypeSizes[outType]);
- }
- }
- }
- else if (inType == kfloat || inType == kdouble)
- { // outtype and intype is float or double. NaN conversions for float <->
- // double can be any NaN
- if (inType == kfloat && outType == kdouble)
- {
- float *inp = (float *)gIn;
- double *outp = (double *)mapped;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
- }
- }
- if (inType == kdouble && outType == kfloat)
- {
- double *inp = (double *)gIn;
- float *outp = (float *)mapped;
- for (j = 0; j < count; j++)
- {
- if (isnan(inp[j]) && isnan(outp[j])) outp[j] = NAN;
- }
- }
- }
-
- if (memcmp(mapped, gRef, count * gTypeSizes[outType]))
- info->result = gCheckResults[outType](mapped, gRef, gAllowZ, count,
- vectorSizes[vectorSize]);
- else
- info->result = 0;
-
- // Fill the output buffer with junk and release it
- {
- cl_uint pattern = 0xffffdead;
- memset_pattern4(mapped, &pattern, count * gTypeSizes[outType]);
- if ((error = clEnqueueUnmapMemObject(gQueue, gOutBuffers[vectorSize],
- mapped, 0, NULL, NULL)))
- {
- vlog_error("ERROR: clEnqueueUnmapMemObject failed in "
- "CalcReferenceValuesComplete (%d)\n",
- error);
- gFailCount++;
- }
- }
-
- if (1 == ThreadPool_AtomicAdd(&info->parent->barrierCount, -1))
- {
- if ((status = clSetUserEventStatus(doneBarrier, CL_COMPLETE)))
- {
- vlog_error("ERROR: clSetUserEventStatus failed in "
- "CalcReferenceValuesComplete (err: %d). We're probably "
- "going to deadlock.\n",
- status);
- gFailCount++;
- return;
- }
-
- if ((status = clReleaseEvent(doneBarrier)))
- {
- vlog_error("ERROR: clReleaseEvent failed in "
- "CalcReferenceValuesComplete (err: %d).\n",
- status);
- gFailCount++;
- return;
- }
- }
- // e was already released by WriteInputBufferComplete. It should be
- // destroyed automatically soon after all the calls to
- // CalcReferenceValuesComplete exit.
-}
-
-static cl_program MakeProgram(Type outType, Type inType, SaturationMode sat,
- RoundingMode round, int vectorSize,
- cl_kernel *outKernel)
-{
- cl_program program;
- char testName[256];
- int error = 0;
-
- std::ostringstream source;
- if (outType == kdouble || inType == kdouble)
- source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-
- // Create the program. This is a bit complicated because we are trying to
- // avoid byte and short stores.
- if (0 == vectorSize)
- {
- // Create the type names.
- char inName[32];
- char outName[32];
- strncpy(inName, gTypeNames[inType], sizeof(inName));
- strncpy(outName, gTypeNames[outType], sizeof(outName));
- sprintf(testName, "test_implicit_%s_%s", outName, inName);
-
- source << "__kernel void " << testName << "( __global " << inName
- << " *src, __global " << outName << " *dest )\n";
- source << "{\n";
- source << " size_t i = get_global_id(0);\n";
- source << " dest[i] = src[i];\n";
- source << "}\n";
-
- vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType],
- gTypeNames[outType]);
- fflush(stdout);
- }
- else
- {
- int vectorSizetmp = vectorSizes[vectorSize];
-
- // Create the type names.
- char convertString[128];
- char inName[32];
- char outName[32];
- switch (vectorSizetmp)
- {
- case 1:
- strncpy(inName, gTypeNames[inType], sizeof(inName));
- strncpy(outName, gTypeNames[outType], sizeof(outName));
- snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
- outName, gSaturationNames[sat],
- gRoundingModeNames[round]);
- snprintf(testName, 256, "test_%s_%s", convertString, inName);
- vlog("Building %s( %s ) test\n", convertString, inName);
- break;
- case 3:
- strncpy(inName, gTypeNames[inType], sizeof(inName));
- strncpy(outName, gTypeNames[outType], sizeof(outName));
- snprintf(convertString, sizeof(convertString),
- "convert_%s3%s%s", outName, gSaturationNames[sat],
- gRoundingModeNames[round]);
- snprintf(testName, 256, "test_%s_%s3", convertString, inName);
- vlog("Building %s( %s3 ) test\n", convertString, inName);
- break;
- default:
- snprintf(inName, sizeof(inName), "%s%d", gTypeNames[inType],
- vectorSizetmp);
- snprintf(outName, sizeof(outName), "%s%d", gTypeNames[outType],
- vectorSizetmp);
- snprintf(convertString, sizeof(convertString), "convert_%s%s%s",
- outName, gSaturationNames[sat],
- gRoundingModeNames[round]);
- snprintf(testName, 256, "test_%s_%s", convertString, inName);
- vlog("Building %s( %s ) test\n", convertString, inName);
- break;
- }
- fflush(stdout);
-
- if (vectorSizetmp == 3)
- {
- source << "__kernel void " << testName << "( __global " << inName
- << " *src, __global " << outName << " *dest )\n";
- source << "{\n";
- source << " size_t i = get_global_id(0);\n";
- source << " if( i + 1 < get_global_size(0))\n";
- source << " vstore3( " << convertString
- << "( vload3( i, src)), i, dest );\n";
- source << " else\n";
- source << " {\n";
- source << " " << inName << "3 in;\n";
- source << " " << outName << "3 out;\n";
- source << " if( 0 == (i & 1) )\n";
- source << " in.y = src[3*i+1];\n";
- source << " in.x = src[3*i];\n";
- source << " out = " << convertString << "( in ); \n";
- source << " dest[3*i] = out.x;\n";
- source << " if( 0 == (i & 1) )\n";
- source << " dest[3*i+1] = out.y;\n";
- source << " }\n";
- source << "}\n";
- }
- else
- {
- source << "__kernel void " << testName << "( __global " << inName
- << " *src, __global " << outName << " *dest )\n";
- source << "{\n";
- source << " size_t i = get_global_id(0);\n";
- source << " dest[i] = " << convertString << "( src[i] );\n";
- source << "}\n";
- }
- }
- *outKernel = NULL;
-
- const char *flags = NULL;
- if (gForceFTZ) flags = "-cl-denorms-are-zero";
-
- // build it
- std::string sourceString = source.str();
- const char *programSource = sourceString.c_str();
- error = create_single_kernel_helper(gContext, &program, outKernel, 1,
- &programSource, testName, flags);
- if (error)
- {
- vlog_error("Failed to build kernel/program (err = %d).\n", error);
- clReleaseProgram(program);
- return NULL;
- }
-
- return program;
-}
diff --git a/test_conformance/d3d10/harness.h b/test_conformance/d3d10/harness.h
index 184e52cb..afeb4966 100644
--- a/test_conformance/d3d10/harness.h
+++ b/test_conformance/d3d10/harness.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _HARNESS_H_
-#define _HARNESS_H_
+#ifndef HARNESS_H_
+#define HARNESS_H_
#define _CRT_SECURE_NO_WARNINGS
diff --git a/test_conformance/device_execution/CMakeLists.txt b/test_conformance/device_execution/CMakeLists.txt
index 5e9e30e3..275b96c2 100644
--- a/test_conformance/device_execution/CMakeLists.txt
+++ b/test_conformance/device_execution/CMakeLists.txt
@@ -17,6 +17,6 @@ set(DEVICE_EXECUTION_SOURCES
utils.cpp
)
-include(../CMakeCommon.txt)
+set_gnulike_module_compile_flags("-Wno-sign-compare")
-# end of file #
+include(../CMakeCommon.txt)
diff --git a/test_conformance/device_execution/enqueue_block.cpp b/test_conformance/device_execution/enqueue_block.cpp
index 29a6cec1..4ddd1db7 100644
--- a/test_conformance/device_execution/enqueue_block.cpp
+++ b/test_conformance/device_execution/enqueue_block.cpp
@@ -27,561 +27,538 @@
#ifdef CL_VERSION_2_0
extern int gWimpyMode;
-static const char* enqueue_simple_block[] =
-{
- NL, "void block_fn(size_t tid, int mul, __global int* res)"
- NL, "{"
- NL, " res[tid] = mul * 7 - 21;"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_simple_block(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
- NL, ""
- NL, " res[tid] = -1;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, "}"
- NL
-};
-static const char* enqueue_block_with_local_arg1[] =
-{
- NL, "#define LOCAL_MEM_SIZE 10"
- NL, ""
- NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)"
- NL, "{"
- NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
- NL, " {"
- NL, " tmp[i] = mul * 7 - 21;"
- NL, " res[tid] += tmp[i];"
- NL, " }"
- NL, " res[tid] += 2;"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_with_local_arg1(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };"
- NL, ""
- NL, " res[tid] = -2;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, "}"
- NL
-};
+// clang-format off
+static const char* enqueue_simple_block[] = { R"(
+ void block_fn(size_t tid, int mul, __global int* res)
+ {
+ res[tid] = mul * 7 - 21;
+ }
-static const char* enqueue_block_with_local_arg2[] =
-{
- NL, "#define LOCAL_MEM_SIZE 10"
- NL, ""
- NL, "void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)"
- NL, "{"
- NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
- NL, " {"
- NL, " tmp1[i] = mul * 7 - 21;"
- NL, " tmp2[i].x = (float)(mul * 7 - 21);"
- NL, " tmp2[i].y = (float)(mul * 7 - 21);"
- NL, " tmp2[i].z = (float)(mul * 7 - 21);"
- NL, " tmp2[i].w = (float)(mul * 7 - 21);"
- NL, ""
- NL, " res[tid] += tmp1[i];"
- NL, " res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);"
- NL, " }"
- NL, " res[tid] += 2;"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_with_local_arg2(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)"
- NL, " { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };"
- NL, ""
- NL, " res[tid] = -2;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, "}"
- NL
-};
+ kernel void enqueue_simple_block(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
-static const char* enqueue_block_with_wait_list[] =
-{
- NL, "#define BLOCK_SUBMITTED 1"
- NL, "#define BLOCK_COMPLETED 2"
- NL, "#define CHECK_SUCCESS 0"
- NL, ""
- NL, "kernel void enqueue_block_with_wait_list(__global int* res)"
- NL, "{"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " clk_event_t user_evt = create_user_event();"
- NL, ""
- NL, " res[tid] = BLOCK_SUBMITTED;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " clk_event_t block_evt;"
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,"
- NL, " ^{"
- NL, " res[tid] = BLOCK_COMPLETED;"
- NL, " });"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, ""
- NL, " retain_event(block_evt);"
- NL, " release_event(block_evt);"
- NL, ""
- NL, " //check block is not started"
- NL, " if(res[tid] == BLOCK_SUBMITTED)"
- NL, " {"
- NL, " clk_event_t my_evt;"
- NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
- NL, " ^{"
- NL, " //check block is completed"
- NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
- NL, " });"
- NL, " release_event(my_evt);"
- NL, " }"
- NL, ""
- NL, " set_user_event_status(user_evt, CL_COMPLETE);"
- NL, ""
- NL, " release_event(user_evt);"
- NL, " release_event(block_evt);"
- NL, "}"
- NL
-};
+ void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
-static const char* enqueue_block_with_wait_list_and_local_arg[] =
-{
- NL, "#define LOCAL_MEM_SIZE 10"
- NL, "#define BLOCK_COMPLETED 1"
- NL, "#define BLOCK_SUBMITTED 2"
- NL, "#define BLOCK_STARTED 3"
- NL, "#define CHECK_SUCCESS 0"
- NL, ""
- NL, "void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)"
- NL, "{"
- NL, " res[tid] = BLOCK_STARTED;"
- NL, " for(int i = 0; i < LOCAL_MEM_SIZE; i++)"
- NL, " {"
- NL, " tmp[i] = mul * 7 - 21;"
- NL, " res[tid] += tmp[i];"
- NL, " }"
- NL, " if(res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, " clk_event_t user_evt = create_user_event();"
- NL, ""
- NL, " res[tid] = BLOCK_SUBMITTED;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " clk_event_t block_evt;"
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt, "
- NL, " ^(__local void* buf) {"
- NL, " block_fn_local_arg(tid, multiplier, res, (__local int*)buf);"
- NL, " }, LOCAL_MEM_SIZE*sizeof(int));"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, ""
- NL, " retain_event(block_evt);"
- NL, " release_event(block_evt);"
- NL, ""
- NL, " //check block is not started"
- NL, " if(res[tid] == BLOCK_SUBMITTED)"
- NL, " {"
- NL, " clk_event_t my_evt;"
- NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt, "
- NL, " ^{"
- NL, " //check block is completed"
- NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
- NL, " });"
- NL, " release_event(my_evt);"
- NL, " }"
- NL, ""
- NL, " set_user_event_status(user_evt, CL_COMPLETE);"
- NL, ""
- NL, " release_event(user_evt);"
- NL, " release_event(block_evt);"
- NL, "}"
- NL
-};
+ res[tid] = -1;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+ }
+)" };
-static const char* enqueue_block_get_kernel_work_group_size[] =
-{
- NL, "void block_fn(size_t tid, int mul, __global int* res)"
- NL, "{"
- NL, " res[tid] = mul * 7 - 21;"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_get_kernel_work_group_size(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
- NL, ""
- NL, " size_t local_work_size = get_kernel_work_group_size(kernelBlock);"
- NL, " if (local_work_size <= 0){ res[tid] = -1; return; }"
- NL, " size_t global_work_size = local_work_size * 4;"
- NL, ""
- NL, " res[tid] = -1;"
- NL, " queue_t q1 = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
- NL, ""
- NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, "}"
-};
+static const char* enqueue_block_with_local_arg1[] = { R"(
+ #define LOCAL_MEM_SIZE 10
-static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] =
-{
- NL, "void block_fn(size_t tid, int mul, __global int* res)"
- NL, "{"
- NL, " res[tid] = mul * 7 - 21;"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
- NL, ""
- NL, " size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);"
- NL, " if (local_work_size <= 0){ res[tid] = -1; return; }"
- NL, " size_t global_work_size = local_work_size * 4;"
- NL, ""
- NL, " res[tid] = -1;"
- NL, " queue_t q1 = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);"
- NL, ""
- NL, " int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, "}"
-};
+ void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp)
+ {
+ for (int i = 0; i < LOCAL_MEM_SIZE; i++)
+ {
+ tmp[i] = mul * 7 - 21;
+ res[tid] += tmp[i];
+ }
+ res[tid] += 2;
+ }
-static const char* enqueue_block_capture_event_profiling_info_after_execution[] =
-{
- NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
- NL, ""
- NL, "__global ulong value[MAX_GWS*2] = {0};"
- NL, ""
- NL, "void block_fn(size_t tid, __global int* res)"
- NL, "{"
- NL, " res[tid] = -2;"
- NL, "}"
- NL, ""
- NL, "void check_res(size_t tid, const clk_event_t evt, __global int* res)"
- NL, "{"
- NL, " capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
- NL, ""
- NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;"
- NL, " else res[tid] = -4;"
- NL, " release_event(evt);"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)"
- NL, "{"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " res[tid] = -1;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " clk_event_t block_evt1;"
- NL, ""
- NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };"
- NL, ""
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, ""
- NL, " void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };"
- NL, ""
- NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
- NL, "}"
- NL
-};
+ kernel void enqueue_block_with_local_arg1(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
-static const char* enqueue_block_capture_event_profiling_info_before_execution[] =
-{
- NL, "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS)
- NL, ""
- NL, "__global ulong value[MAX_GWS*2] = {0};"
- NL, ""
- NL, "void block_fn(size_t tid, __global int* res)"
- NL, "{"
- NL, " res[tid] = -2;"
- NL, "}"
- NL, ""
- NL, "void check_res(size_t tid, const ulong *value, __global int* res)"
- NL, "{"
- NL, " if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;"
- NL, " else res[tid] = -4;"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, " clk_event_t user_evt = create_user_event();"
- NL, ""
- NL, " res[tid] = -1;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " clk_event_t block_evt1;"
- NL, " clk_event_t block_evt2;"
- NL, ""
- NL, " void (^kernelBlock)(void) = ^{ block_fn (tid, res); };"
- NL, ""
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, ""
- NL, " capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);"
- NL, ""
- NL, " set_user_event_status(user_evt, CL_COMPLETE);"
- NL, ""
- NL, " void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };"
- NL, ""
- NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
- NL, ""
- NL, " release_event(user_evt);"
- NL, " release_event(block_evt1);"
- NL, " release_event(block_evt2);"
- NL, "}"
- NL
-};
+ void (^kernelBlock)(__local void*) = ^(__local void* buf){ block_fn_local_arg1(tid, multiplier, res, (local int*)buf); };
-static const char* enqueue_block_with_barrier[] =
-{
- NL, "void block_fn(size_t tid, int mul, __global int* res)"
- NL, "{"
- NL, " if(mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);"
- NL, " res[tid] = mul * 7 -21;"
- NL, "}"
- NL, ""
- NL, "void loop_fn(size_t tid, int n, __global int* res)"
- NL, "{"
- NL, " while(n > 0)"
- NL, " {"
- NL, " barrier(CLK_GLOBAL_MEM_FENCE);"
- NL, " res[tid] = 0;"
- NL, " --n;"
- NL, " }"
- NL, "}"
- NL, ""
- NL, "kernel void enqueue_block_with_barrier(__global int* res)"
- NL, "{"
- NL, " int multiplier = 3;"
- NL, " size_t tid = get_global_id(0);"
- NL, " queue_t def_q = get_default_queue();"
- NL, " res[tid] = -1;"
- NL, " size_t n = 256;"
- NL, ""
- NL, " void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };"
- NL, ""
- NL, " ndrange_t ndrange = ndrange_1D(n);"
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, ""
- NL, " void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };"
- NL, ""
- NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, "}"
- NL
-};
+ res[tid] = -2;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)));
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+ }
+)" };
-static const char* enqueue_marker_with_block_event[] =
-{
- NL, "#define BLOCK_COMPLETED 1"
- NL, "#define BLOCK_SUBMITTED 2"
- NL, "#define CHECK_SUCCESS 0"
- NL, ""
- NL, "kernel void enqueue_marker_with_block_event(__global int* res)"
- NL, "{"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " clk_event_t user_evt = create_user_event();"
- NL, ""
- NL, " res[tid] = BLOCK_SUBMITTED;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, ""
- NL, " clk_event_t block_evt1;"
- NL, " clk_event_t marker_evt;"
- NL, ""
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,"
- NL, " ^{"
- NL, " res[tid] = BLOCK_COMPLETED;"
- NL, " });"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
- NL, ""
- NL, " enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
- NL, ""
- NL, " retain_event(marker_evt);"
- NL, " release_event(marker_evt);"
- NL, ""
- NL, " //check block is not started"
- NL, " if(res[tid] == BLOCK_SUBMITTED)"
- NL, " {"
- NL, " clk_event_t my_evt;"
- NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
- NL, " ^{"
- NL, " //check block is completed"
- NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
- NL, " });"
- NL, " release_event(my_evt);"
- NL, " }"
- NL, ""
- NL, " set_user_event_status(user_evt, CL_COMPLETE);"
- NL, ""
- NL, " release_event(block_evt1);"
- NL, " release_event(marker_evt);"
- NL, " release_event(user_evt);"
- NL, "}"
- NL
-};
+static const char* enqueue_block_with_local_arg2[] = { R"(
+ #define LOCAL_MEM_SIZE 10
-static const char* enqueue_marker_with_user_event[] =
-{
- NL, "#define BLOCK_COMPLETED 1"
- NL, "#define BLOCK_SUBMITTED 2"
- NL, "#define CHECK_SUCCESS 0"
- NL, ""
- NL, "kernel void enqueue_marker_with_user_event(__global int* res)"
- NL, "{"
- NL, " size_t tid = get_global_id(0);"
- NL, " uint multiplier = 7;"
- NL, ""
- NL, " clk_event_t user_evt = create_user_event();"
- NL, ""
- NL, " res[tid] = BLOCK_SUBMITTED;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, ""
- NL, " clk_event_t marker_evt;"
- NL, " clk_event_t block_evt;"
- NL, ""
- NL, " int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, ""
- NL, " retain_event(marker_evt);"
- NL, " release_event(marker_evt);"
- NL, ""
- NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt, "
- NL, " ^{"
- NL, " if(res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;"
- NL, " });"
- NL, ""
- NL, " //check block is not started"
- NL, " if(res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }"
- NL, ""
- NL, " set_user_event_status(user_evt, CL_COMPLETE);"
- NL, ""
- NL, " release_event(block_evt);"
- NL, " release_event(marker_evt);"
- NL, " release_event(user_evt);"
- NL, "}"
- NL
-};
+ void block_fn_local_arg1(size_t tid, int mul, __global int* res, __local int* tmp1, __local float4* tmp2)
+ {
+ for (int i = 0; i < LOCAL_MEM_SIZE; i++)
+ {
+ tmp1[i] = mul * 7 - 21;
+ tmp2[i].x = (float)(mul * 7 - 21);
+ tmp2[i].y = (float)(mul * 7 - 21);
+ tmp2[i].z = (float)(mul * 7 - 21);
+ tmp2[i].w = (float)(mul * 7 - 21);
+
+ res[tid] += tmp1[i];
+ res[tid] += (int)(tmp2[i].x+tmp2[i].y+tmp2[i].z+tmp2[i].w);
+ }
+ res[tid] += 2;
+ }
-static const char* enqueue_marker_with_mixed_events[] =
-{
- NL, "#define BLOCK_COMPLETED 1"
- NL, "#define BLOCK_SUBMITTED 2"
- NL, "#define CHECK_SUCCESS 0"
- NL, ""
- NL, "kernel void enqueue_marker_with_mixed_events(__global int* res)"
- NL, "{"
- NL, " size_t tid = get_global_id(0);"
- NL, ""
- NL, " clk_event_t mix_ev[2];"
- NL, " mix_ev[0] = create_user_event();"
- NL, ""
- NL, " res[tid] = BLOCK_SUBMITTED;"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, ""
- NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],"
- NL, " ^{"
- NL, " res[tid] = BLOCK_COMPLETED;"
- NL, " });"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -2; return; }"
- NL, ""
- NL, " clk_event_t marker_evt;"
- NL, ""
- NL, " enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
- NL, ""
- NL, " retain_event(marker_evt);"
- NL, " release_event(marker_evt);"
- NL, ""
- NL, " //check block is not started"
- NL, " if(res[tid] == BLOCK_SUBMITTED)"
- NL, " {"
- NL, " clk_event_t my_evt;"
- NL, " enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt, "
- NL, " ^{"
- NL, " //check block is completed"
- NL, " if(res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;"
- NL, " });"
- NL, " release_event(my_evt);"
- NL, " }"
- NL, ""
- NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);"
- NL, ""
- NL, " release_event(mix_ev[1]);"
- NL, " release_event(marker_evt);"
- NL, " release_event(mix_ev[0]);"
- NL, "}"
- NL
-};
+ kernel void enqueue_block_with_local_arg2(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
-static const char* enqueue_block_with_mixed_events[] =
-{
- NL, "kernel void enqueue_block_with_mixed_events(__global int* res)"
- NL, "{"
- NL, " int enq_res;"
- NL, " size_t tid = get_global_id(0);"
- NL, " clk_event_t mix_ev[3];"
- NL, " mix_ev[0] = create_user_event();"
- NL, " queue_t def_q = get_default_queue();"
- NL, " ndrange_t ndrange = ndrange_1D(1);"
- NL, " res[tid] = -2;"
- NL, ""
- NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -1; return; }"
- NL, ""
- NL, " enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -3; return; }"
- NL, ""
- NL, " enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });"
- NL, " if(enq_res != CLK_SUCCESS) { res[tid] = -4; return; }"
- NL, ""
- NL, " set_user_event_status(mix_ev[0], CL_COMPLETE);"
- NL, ""
- NL, " release_event(mix_ev[0]);"
- NL, " release_event(mix_ev[1]);"
- NL, " release_event(mix_ev[2]);"
- NL, "}"
- NL
-};
+ void (^kernelBlock)(__local void*, __local void*) = ^(__local void* buf1, __local void* buf2)
+ { block_fn_local_arg1(tid, multiplier, res, (local int*)buf1, (local float4*)buf2); };
+
+ res[tid] = -2;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock, (uint)(LOCAL_MEM_SIZE*sizeof(int)), (uint)(LOCAL_MEM_SIZE*sizeof(float4)));
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+ }
+)" };
+
+static const char* enqueue_block_with_wait_list[] = { R"(
+ #define BLOCK_SUBMITTED 1
+ #define BLOCK_COMPLETED 2
+ #define CHECK_SUCCESS 0
+
+ kernel void enqueue_block_with_wait_list(__global int* res)
+ {
+ size_t tid = get_global_id(0);
+
+ clk_event_t user_evt = create_user_event();
+
+ res[tid] = BLOCK_SUBMITTED;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ clk_event_t block_evt;
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,
+ ^{
+ res[tid] = BLOCK_COMPLETED;
+ });
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+
+ retain_event(block_evt);
+ release_event(block_evt);
+
+ //check block is not started
+ if (res[tid] == BLOCK_SUBMITTED)
+ {
+ clk_event_t my_evt;
+ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt,
+ ^{
+ //check block is completed
+ if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
+ });
+ release_event(my_evt);
+ }
+
+ set_user_event_status(user_evt, CL_COMPLETE);
+
+ release_event(user_evt);
+ release_event(block_evt);
+ }
+)" };
+
+static const char* enqueue_block_with_wait_list_and_local_arg[] = { R"(
+ #define LOCAL_MEM_SIZE 10
+ #define BLOCK_COMPLETED 1
+ #define BLOCK_SUBMITTED 2
+ #define BLOCK_STARTED 3
+ #define CHECK_SUCCESS 0
+
+ void block_fn_local_arg(size_t tid, int mul, __global int* res, __local int* tmp)
+ {
+ res[tid] = BLOCK_STARTED;
+ for (int i = 0; i < LOCAL_MEM_SIZE; i++)
+ {
+ tmp[i] = mul * 7 - 21;
+ res[tid] += tmp[i];
+ }
+ if (res[tid] == BLOCK_STARTED) res[tid] = BLOCK_COMPLETED;
+ }
+
+ kernel void enqueue_block_with_wait_list_and_local_arg(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
+ clk_event_t user_evt = create_user_event();
+
+ res[tid] = BLOCK_SUBMITTED;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ clk_event_t block_evt;
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt,
+ ^(__local void* buf) {
+ block_fn_local_arg(tid, multiplier, res, (__local int*)buf);
+ }, LOCAL_MEM_SIZE*sizeof(int));
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+
+ retain_event(block_evt);
+ release_event(block_evt);
+
+ //check block is not started
+ if (res[tid] == BLOCK_SUBMITTED)
+ {
+ clk_event_t my_evt;
+ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt, &my_evt,
+ ^{
+ //check block is completed
+ if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
+ });
+ release_event(my_evt);
+ }
+
+ set_user_event_status(user_evt, CL_COMPLETE);
+
+ release_event(user_evt);
+ release_event(block_evt);
+ }
+)" };
+
+static const char* enqueue_block_get_kernel_work_group_size[] = { R"(
+ void block_fn(size_t tid, int mul, __global int* res)
+ {
+ res[tid] = mul * 7 - 21;
+ }
+
+ kernel void enqueue_block_get_kernel_work_group_size(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
+
+ void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
+
+ size_t local_work_size = get_kernel_work_group_size(kernelBlock);
+ if (local_work_size <= 0){ res[tid] = -1; return; }
+ size_t global_work_size = local_work_size * 4;
+
+ res[tid] = -1;
+ queue_t q1 = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);
+
+ int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+ }
+)" };
+
+static const char* enqueue_block_get_kernel_preferred_work_group_size_multiple[] = { R"(
+ void block_fn(size_t tid, int mul, __global int* res)
+ {
+ res[tid] = mul * 7 - 21;
+ }
+
+ kernel void enqueue_block_get_kernel_preferred_work_group_size_multiple(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
+
+ void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
+
+ size_t local_work_size = get_kernel_preferred_work_group_size_multiple(kernelBlock);
+ if (local_work_size <= 0){ res[tid] = -1; return; }
+ size_t global_work_size = local_work_size * 4;
+
+ res[tid] = -1;
+ queue_t q1 = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(global_work_size, local_work_size);
+
+ int enq_res = enqueue_kernel(q1, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+ }
+)" };
+
+static const char* enqueue_block_capture_event_profiling_info_after_execution[] = {
+ "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n"
+ , R"(
+ __global ulong value[MAX_GWS*2] = {0};
+
+ void block_fn(size_t tid, __global int* res)
+ {
+ res[tid] = -2;
+ }
+
+ void check_res(size_t tid, const clk_event_t evt, __global int* res)
+ {
+ capture_event_profiling_info (evt, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);
+
+ if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;
+ else res[tid] = -4;
+ release_event(evt);
+ }
+
+ kernel void enqueue_block_capture_event_profiling_info_after_execution(__global int* res)
+ {
+ size_t tid = get_global_id(0);
+
+ res[tid] = -1;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ clk_event_t block_evt1;
+
+ void (^kernelBlock)(void) = ^{ block_fn (tid, res); };
+
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 0, NULL, &block_evt1, kernelBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+
+ void (^checkBlock) (void) = ^{ check_res(tid, block_evt1, res); };
+
+ enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, NULL, checkBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
+ }
+)" };
+
+static const char* enqueue_block_capture_event_profiling_info_before_execution[] = {
+ "#define MAX_GWS " STRINGIFY_VALUE(MAX_GWS) "\n"
+ , R"(
+ __global ulong value[MAX_GWS*2] = {0};
+
+ void block_fn(size_t tid, __global int* res)
+ {
+ res[tid] = -2;
+ }
+
+ void check_res(size_t tid, const ulong *value, __global int* res)
+ {
+ if (value[tid*2] > 0 && value[tid*2+1] > 0) res[tid] = 0;
+ else res[tid] = -4;
+ }
+
+ kernel void enqueue_block_capture_event_profiling_info_before_execution(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
+ clk_event_t user_evt = create_user_event();
+
+ res[tid] = -1;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ clk_event_t block_evt1;
+ clk_event_t block_evt2;
+
+ void (^kernelBlock)(void) = ^{ block_fn (tid, res); };
+
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1, kernelBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+
+ capture_event_profiling_info (block_evt1, CLK_PROFILING_COMMAND_EXEC_TIME, &value[tid*2]);
+
+ set_user_event_status(user_evt, CL_COMPLETE);
+
+ void (^checkBlock) (void) = ^{ check_res(tid, &value, res); };
+
+ enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &block_evt1, &block_evt2, checkBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
+
+ release_event(user_evt);
+ release_event(block_evt1);
+ release_event(block_evt2);
+ }
+)" };
+
+static const char* enqueue_block_with_barrier[] = { R"(
+ void block_fn(size_t tid, int mul, __global int* res)
+ {
+ if (mul > 0) barrier(CLK_GLOBAL_MEM_FENCE);
+ res[tid] = mul * 7 -21;
+ }
+
+ void loop_fn(size_t tid, int n, __global int* res)
+ {
+ while (n > 0)
+ {
+ barrier(CLK_GLOBAL_MEM_FENCE);
+ res[tid] = 0;
+ --n;
+ }
+ }
+
+ kernel void enqueue_block_with_barrier(__global int* res)
+ {
+ int multiplier = 3;
+ size_t tid = get_global_id(0);
+ queue_t def_q = get_default_queue();
+ res[tid] = -1;
+ size_t n = 256;
+
+ void (^kernelBlock)(void) = ^{ block_fn(tid, multiplier, res); };
+
+ ndrange_t ndrange = ndrange_1D(n);
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+
+ void (^loopBlock)(void) = ^{ loop_fn(tid, n, res); };
+
+ enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, loopBlock);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+ }
+)" };
+
+static const char* enqueue_marker_with_block_event[] = { R"(
+ #define BLOCK_COMPLETED 1
+ #define BLOCK_SUBMITTED 2
+ #define CHECK_SUCCESS 0
+
+ kernel void enqueue_marker_with_block_event(__global int* res)
+ {
+ size_t tid = get_global_id(0);
+
+ clk_event_t user_evt = create_user_event();
+
+ res[tid] = BLOCK_SUBMITTED;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+
+ clk_event_t block_evt1;
+ clk_event_t marker_evt;
+
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &user_evt, &block_evt1,
+ ^{
+ res[tid] = BLOCK_COMPLETED;
+ });
+ if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; }
+
+ enq_res = enqueue_marker(def_q, 1, &block_evt1, &marker_evt);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
+
+ retain_event(marker_evt);
+ release_event(marker_evt);
+
+ //check block is not started
+ if (res[tid] == BLOCK_SUBMITTED)
+ {
+ clk_event_t my_evt;
+ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt,
+ ^{
+ //check block is completed
+ if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
+ });
+ release_event(my_evt);
+ }
+
+ set_user_event_status(user_evt, CL_COMPLETE);
+
+ release_event(block_evt1);
+ release_event(marker_evt);
+ release_event(user_evt);
+ }
+)" };
+
+static const char* enqueue_marker_with_user_event[] = { R"(
+ #define BLOCK_COMPLETED 1
+ #define BLOCK_SUBMITTED 2
+ #define CHECK_SUCCESS 0
+
+ kernel void enqueue_marker_with_user_event(__global int* res)
+ {
+ size_t tid = get_global_id(0);
+ uint multiplier = 7;
+
+ clk_event_t user_evt = create_user_event();
+
+ res[tid] = BLOCK_SUBMITTED;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+
+ clk_event_t marker_evt;
+ clk_event_t block_evt;
+
+ int enq_res = enqueue_marker(def_q, 1, &user_evt, &marker_evt);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+
+ retain_event(marker_evt);
+ release_event(marker_evt);
+
+ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &block_evt,
+ ^{
+ if (res[tid] == BLOCK_SUBMITTED) res[tid] = CHECK_SUCCESS;
+ });
+
+ //check block is not started
+ if (res[tid] != BLOCK_SUBMITTED) { res[tid] = -2; return; }
+
+ set_user_event_status(user_evt, CL_COMPLETE);
+
+ release_event(block_evt);
+ release_event(marker_evt);
+ release_event(user_evt);
+ }
+)" };
+
+static const char* enqueue_marker_with_mixed_events[] = { R"(
+ #define BLOCK_COMPLETED 1
+ #define BLOCK_SUBMITTED 2
+ #define CHECK_SUCCESS 0
+
+ kernel void enqueue_marker_with_mixed_events(__global int* res)
+ {
+ size_t tid = get_global_id(0);
+
+ clk_event_t mix_ev[2];
+ mix_ev[0] = create_user_event();
+
+ res[tid] = BLOCK_SUBMITTED;
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+
+ int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1],
+ ^{
+ res[tid] = BLOCK_COMPLETED;
+ });
+ if (enq_res != CLK_SUCCESS) { res[tid] = -2; return; }
+
+ clk_event_t marker_evt;
+
+ enq_res = enqueue_marker(def_q, 2, mix_ev, &marker_evt);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
+
+ retain_event(marker_evt);
+ release_event(marker_evt);
+
+ //check block is not started
+ if (res[tid] == BLOCK_SUBMITTED)
+ {
+ clk_event_t my_evt;
+ enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &marker_evt, &my_evt,
+ ^{
+ //check block is completed
+ if (res[tid] == BLOCK_COMPLETED) res[tid] = CHECK_SUCCESS;
+ });
+ release_event(my_evt);
+ }
+
+ set_user_event_status(mix_ev[0], CL_COMPLETE);
+
+ release_event(mix_ev[1]);
+ release_event(marker_evt);
+ release_event(mix_ev[0]);
+ }
+)" };
+
+static const char* enqueue_block_with_mixed_events[] = { R"(
+ kernel void enqueue_block_with_mixed_events(__global int* res)
+ {
+ int enq_res;
+ size_t tid = get_global_id(0);
+ clk_event_t mix_ev[3];
+ mix_ev[0] = create_user_event();
+ queue_t def_q = get_default_queue();
+ ndrange_t ndrange = ndrange_1D(1);
+ res[tid] = -2;
+
+ enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, 1, &mix_ev[0], &mix_ev[1], ^{ res[tid]++; });
+ if (enq_res != CLK_SUCCESS) { res[tid] = -1; return; }
+
+ enq_res = enqueue_marker(def_q, 1, &mix_ev[1], &mix_ev[2]);
+ if (enq_res != CLK_SUCCESS) { res[tid] = -3; return; }
+
+ enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_NO_WAIT, ndrange, sizeof(mix_ev)/sizeof(mix_ev[0]), mix_ev, NULL, ^{ res[tid]++; });
+ if (enq_res != CLK_SUCCESS) { res[tid] = -4; return; }
+
+ set_user_event_status(mix_ev[0], CL_COMPLETE);
+
+ release_event(mix_ev[0]);
+ release_event(mix_ev[1]);
+ release_event(mix_ev[2]);
+ }
+)" };
+// clang-format on
static const kernel_src sources_enqueue_block[] =
{
diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
index 4b9968c3..8a4a116a 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
@@ -3,6 +3,7 @@ set(MODULE_NAME CL_KHR_COMMAND_BUFFER)
set(${MODULE_NAME}_SOURCES
main.cpp
basic_command_buffer.cpp
+ svm_command_basic.cpp
command_buffer_printf.cpp
command_buffer_get_command_buffer_info.cpp
command_buffer_set_kernel_arg.cpp
@@ -14,8 +15,11 @@ set(${MODULE_NAME}_SOURCES
command_buffer_test_copy.cpp
command_buffer_test_barrier.cpp
command_buffer_test_event_info.cpp
+ command_buffer_finalize.cpp
)
+set_gnulike_module_compile_flags("-Wno-sign-compare")
+
include(../../CMakeCommon.txt)
add_subdirectory( cl_khr_command_buffer_mutable_dispatch )
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
index 43734da0..6c02f9f7 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
@@ -201,14 +201,33 @@ struct BasicEnqueueTest : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_int> output_data(num_elements);
+ std::vector<cl_int> output_data_1(num_elements);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
- output_data.data(), 0, nullptr, nullptr);
+ output_data_1.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
- CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i);
+ }
+
+ const cl_int new_pattern = 12;
+ error = clEnqueueFillBuffer(queue, in_mem, &new_pattern, sizeof(cl_int),
+ 0, data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> output_data_2(num_elements);
+ error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+ output_data_2.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(new_pattern, output_data_2[i], i);
}
return CL_SUCCESS;
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
index a20229e0..d08a11af 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef _CL_KHR_BASIC_COMMAND_BUFFER_H
-#define _CL_KHR_BASIC_COMMAND_BUFFER_H
+#ifndef CL_KHR_BASIC_COMMAND_BUFFER_H
+#define CL_KHR_BASIC_COMMAND_BUFFER_H
#include "command_buffer_test_base.h"
#include "harness/typeWrappers.h"
@@ -28,12 +28,24 @@
{ \
if (reference != result) \
{ \
- log_error("Expected %d was %d at index %u\n", reference, result, \
+ log_error("Expected %d was %d at index %zu\n", reference, result, \
index); \
return TEST_FAIL; \
} \
}
+// If it is supported get the addresses of all the APIs here.
+#define GET_EXTENSION_ADDRESS(FUNC) \
+ FUNC = reinterpret_cast<FUNC##_fn>( \
+ clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \
+ if (FUNC == nullptr) \
+ { \
+ log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
+ " with " #FUNC "\n"); \
+ return TEST_FAIL; \
+ }
+
+
// Helper test fixture for constructing OpenCL objects used in testing
// a variety of simple command-buffer enqueue scenarios.
struct BasicCommandBufferTest : CommandBufferTestBase
@@ -70,6 +82,7 @@ protected:
clCommandBufferWrapper command_buffer;
};
+
template <class T>
int MakeAndRunTest(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
@@ -99,4 +112,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
return TEST_PASS;
}
-#endif // _CL_KHR_BASIC_COMMAND_BUFFER_H
+#endif // CL_KHR_BASIC_COMMAND_BUFFER_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
index e0625833..9b598d8b 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/CMakeLists.txt
@@ -3,7 +3,15 @@ set(MODULE_NAME CL_KHR_MUTABLE_DISPATCH)
set(${MODULE_NAME}_SOURCES
main.cpp
mutable_command_info.cpp
+ mutable_command_image_arguments.cpp
+ mutable_command_arguments.cpp
+ mutable_command_out_of_order.cpp
+ mutable_command_global_size.cpp
+ mutable_command_local_size.cpp
+ mutable_command_global_offset.cpp
../basic_command_buffer.cpp
)
+set_gnulike_module_compile_flags("-Wno-sign-compare")
+
include(../../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
index 97075792..a2fae497 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/main.cpp
@@ -26,6 +26,18 @@ test_definition test_list[] = {
ADD_TEST(mutable_command_info_global_work_offset),
ADD_TEST(mutable_command_info_local_work_size),
ADD_TEST(mutable_command_info_global_work_size),
+ ADD_TEST(mutable_dispatch_image_1d_arguments),
+ ADD_TEST(mutable_dispatch_image_2d_arguments),
+ ADD_TEST(mutable_dispatch_out_of_order),
+ ADD_TEST(mutable_dispatch_simultaneous_out_of_order),
+ ADD_TEST(mutable_dispatch_global_size),
+ ADD_TEST(mutable_dispatch_local_size),
+ ADD_TEST(mutable_dispatch_global_offset),
+ ADD_TEST(mutable_dispatch_svm_arguments),
+ ADD_TEST(mutable_dispatch_local_arguments),
+ ADD_TEST(mutable_dispatch_global_arguments),
+ ADD_TEST(mutable_dispatch_pod_arguments),
+ ADD_TEST(mutable_dispatch_null_arguments),
};
int main(int argc, const char *argv[])
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp
new file mode 100644
index 00000000..5c8291f0
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_arguments.cpp
@@ -0,0 +1,847 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "testHarness.h"
+#include "imageHelpers.h"
+#include "mutable_command_basic.h"
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases for
+// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR:
+// - __global arguments
+// - __local arguments
+// - plain-old-data arguments
+// - NULL arguments
+// - SVM arguments
+
+struct MutableDispatchGlobalArguments : public BasicMutableCommandBufferTest
+{
+ using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+ MutableDispatchGlobalArguments(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ virtual cl_int SetUp(int elements) override
+ {
+ BasicMutableCommandBufferTest::SetUp(elements);
+
+ return 0;
+ }
+
+ cl_int Run() override
+ {
+ cl_int error;
+
+ // Create kernel
+
+ const char *sample_const_arg_kernel =
+ R"(
+ __kernel void sample_test(__constant int *src, __global int *dst)
+ {
+ size_t tid = get_global_id(0);
+ dst[tid] = src[tid];
+ })";
+
+ error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &sample_const_arg_kernel,
+ "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ // Create and initialize buffers
+
+ MTdataHolder d(gRandomSeed);
+
+ std::vector<cl_int> srcData(num_elements);
+ for (size_t i = 0; i < num_elements; i++)
+ srcData[i] = (cl_int)genrand_int32(d);
+
+ clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ num_elements * sizeof(cl_int),
+ srcData.data(), &error);
+ test_error(error, "Creating src buffer");
+
+ clMemWrapper dstBuf0 =
+ clCreateBuffer(context, CL_MEM_READ_WRITE,
+ num_elements * sizeof(cl_int), NULL, &error);
+ test_error(error, "Creating initial dst buffer failed");
+
+ clMemWrapper dstBuf1 =
+ clCreateBuffer(context, CL_MEM_READ_WRITE,
+ num_elements * sizeof(cl_int), NULL, &error);
+ test_error(error, "Creating updated dst buffer failed");
+
+ // Build and execute the command buffer for the initial execution
+
+ error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf);
+ test_error(error, "Unable to set src kernel arguments");
+
+ error = clSetKernelArg(kernel, 1, sizeof(dstBuf0), &dstBuf0);
+ test_error(error, "Unable to set initial dst kernel argument");
+
+ cl_ndrange_kernel_command_properties_khr props[] = {
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+ };
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ // Check the results of the initial execution
+
+ std::vector<cl_int> dstData0(num_elements);
+ error = clEnqueueReadBuffer(queue, dstBuf0, CL_TRUE, 0,
+ num_elements * sizeof(cl_int),
+ dstData0.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer for initial dst failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ if (srcData[i] != dstData0[i])
+ {
+ log_error("Initial data failed to verify: src[%zu]=%d != "
+ "dst[%zu]=%d\n",
+ i, srcData[i], i, dstData0[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ // Modify and execute the command buffer
+
+ cl_mutable_dispatch_arg_khr arg{ 1, sizeof(dstBuf1), &dstBuf1 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ &arg /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ // Check the results of the modified execution
+
+ std::vector<cl_int> dstData1(num_elements);
+ error = clEnqueueReadBuffer(queue, dstBuf1, CL_TRUE, 0,
+ num_elements * sizeof(cl_int),
+ dstData1.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer for modified dst failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ if (srcData[i] != dstData1[i])
+ {
+ log_error("Initial data failed to verify: src[%zu]=%d != "
+ "dst[%zu]=%d\n",
+ i, srcData[i], i, dstData1[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ return TEST_PASS;
+ }
+
+ cl_mutable_command_khr command = nullptr;
+};
+
+struct MutableDispatchLocalArguments : public BasicMutableCommandBufferTest
+{
+ using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+ MutableDispatchLocalArguments(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ virtual cl_int SetUp(int elements) override
+ {
+ BasicMutableCommandBufferTest::SetUp(elements);
+
+ return 0;
+ }
+
+ cl_int Run() override
+ {
+ const char *sample_const_arg_kernel =
+ R"(
+ __kernel void sample_test(__constant int *src1, __local int
+ *src, __global int *dst)
+ {
+ size_t tid = get_global_id(0);
+ src[tid] = src1[tid];
+ dst[tid] = src[tid];
+ })";
+
+ cl_int error;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ size_t threads[1], localThreads[1];
+ std::vector<cl_int> constantData;
+ std::vector<cl_int> resultData;
+
+ error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &sample_const_arg_kernel,
+ "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ MTdataHolder d(gRandomSeed);
+
+ size_t sizeToAllocate =
+ ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int);
+ size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
+ constantData.resize(sizeToAllocate / sizeof(cl_int));
+ resultData.resize(sizeToAllocate / sizeof(cl_int));
+
+ for (size_t i = 0; i < numberOfInts; i++)
+ constantData[i] = (cl_int)genrand_int32(d);
+
+ clMemWrapper streams[2];
+ streams[0] =
+ clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
+ constantData.data(), &error);
+ test_error(error, "Creating test array failed");
+ streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
+ nullptr, &error);
+ test_error(error, "Creating test array failed");
+
+ /* Set the arguments */
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &streams[0]);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error =
+ clSetKernelArg(kernel, 1, numberOfInts * sizeof(cl_int), nullptr);
+ test_error(error, "Unable to set indexed kernel arguments");
+ error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &streams[1]);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ threads[0] = numberOfInts;
+ localThreads[0] = 1;
+
+ cl_ndrange_kernel_command_properties_khr props[] = {
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+ };
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, props, kernel, 1, nullptr, threads,
+ localThreads, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_mem), nullptr };
+ cl_mutable_dispatch_arg_khr args[] = { arg_1 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed.");
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error =
+ clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate,
+ resultData.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < numberOfInts; i++)
+ if (constantData[i] != resultData[i])
+ {
+ log_error("Data failed to verify: constantData[%d]=%d != "
+ "resultData[%d]=%d\n",
+ i, constantData[i], i, resultData[i]);
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+ }
+
+ cl_mutable_command_khr command = nullptr;
+ const cl_ulong max_size = 16;
+};
+
+struct MutableDispatchPODArguments : public BasicMutableCommandBufferTest
+{
+ using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+ MutableDispatchPODArguments(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ cl_int Run() override
+ {
+ const char *sample_const_arg_kernel =
+ R"(
+ __kernel void sample_test(__constant int *src, int dst)
+ {
+ size_t tid = get_global_id(0);
+ dst = src[tid];
+ })";
+
+ cl_int error;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+ size_t threads[1], localThreads[1];
+ std::vector<cl_int> constantData;
+ std::vector<cl_int> resultData;
+
+ error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &sample_const_arg_kernel,
+ "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ MTdataHolder d(gRandomSeed);
+
+ size_t sizeToAllocate =
+ ((size_t)max_size / sizeof(cl_int)) * sizeof(cl_int);
+ size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
+ constantData.resize(sizeToAllocate / sizeof(cl_int));
+ resultData.resize(sizeToAllocate / sizeof(cl_int));
+
+ for (size_t i = 0; i < numberOfInts; i++)
+ constantData[i] = (cl_int)genrand_int32(d);
+
+ clMemWrapper stream;
+ stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
+ constantData.data(), &error);
+ test_error(error, "Creating test array failed");
+
+
+ /* Set the arguments */
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream);
+ test_error(error, "Unable to set indexed kernel arguments");
+ cl_int intarg = 10;
+ error = clSetKernelArg(kernel, 1, sizeof(cl_int), &intarg);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ threads[0] = numberOfInts;
+ localThreads[0] = 1;
+
+ cl_ndrange_kernel_command_properties_khr props[] = {
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+ };
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, props, kernel, 1, nullptr, threads,
+ localThreads, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ intarg = 20;
+ cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(cl_int), &intarg };
+ cl_mutable_dispatch_arg_khr args[] = { arg_1 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed.");
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate,
+ resultData.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < numberOfInts; i++)
+ if (constantData[i] != resultData[i])
+ {
+ log_error("Data failed to verify: constantData[%d]=%d != "
+ "resultData[%d]=%d\n",
+ i, constantData[i], i, resultData[i]);
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+ }
+
+ cl_mutable_command_khr command = nullptr;
+ const cl_ulong max_size = 16;
+};
+
+struct MutableDispatchNullArguments : public BasicMutableCommandBufferTest
+{
+ using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+ MutableDispatchNullArguments(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ cl_int Run() override
+ {
+ cl_int error;
+
+ // Create kernel
+
+ const char *sample_const_arg_kernel =
+ R"(
+ __kernel void sample_test(__constant int *src, __global int *dst)
+ {
+ size_t tid = get_global_id(0);
+ dst[tid] = src ? src[tid] : 12345;
+ })";
+
+ error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &sample_const_arg_kernel,
+ "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ MTdataHolder d(gRandomSeed);
+
+ std::vector<cl_int> srcData(num_elements);
+ for (size_t i = 0; i < num_elements; i++)
+ srcData[i] = (cl_int)genrand_int32(d);
+
+ clMemWrapper srcBuf = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+ num_elements * sizeof(cl_int),
+ srcData.data(), &error);
+ test_error(error, "Creating src buffer");
+
+ clMemWrapper dstBuf =
+ clCreateBuffer(context, CL_MEM_READ_WRITE,
+ num_elements * sizeof(cl_int), NULL, &error);
+ test_error(error, "Creating dst buffer failed");
+
+ // Build and execute the command buffer for the initial execution
+
+ error = clSetKernelArg(kernel, 0, sizeof(srcBuf), &srcBuf);
+ test_error(error, "Unable to set src kernel arguments");
+
+ error = clSetKernelArg(kernel, 1, sizeof(dstBuf), &dstBuf);
+ test_error(error, "Unable to set initial dst kernel argument");
+
+ cl_ndrange_kernel_command_properties_khr props[] = {
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+ };
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ // Check the results of the initial execution
+
+ std::vector<cl_int> dstData0(num_elements);
+ error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0,
+ num_elements * sizeof(cl_int),
+ dstData0.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer for initial dst failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ if (srcData[i] != dstData0[i])
+ {
+ log_error("Initial data failed to verify: src[%zu]=%d != "
+ "dst[%zu]=%d\n",
+ i, srcData[i], i, dstData0[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ // Modify and execute the command buffer
+
+ cl_mutable_dispatch_arg_khr arg{ 0, sizeof(cl_mem), nullptr };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ &arg /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ // Check the results of the modified execution
+
+ std::vector<cl_int> dstData1(num_elements);
+ error = clEnqueueReadBuffer(queue, dstBuf, CL_TRUE, 0,
+ num_elements * sizeof(cl_int),
+ dstData1.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer for modified dst failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ if (12345 != dstData1[i])
+ {
+ log_error("Modified data failed to verify: %d != dst[%zu]=%d\n",
+ 12345, i, dstData1[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ return TEST_PASS;
+ }
+
+ cl_mutable_command_khr command = nullptr;
+ const cl_ulong max_size = 16;
+};
+
+struct MutableDispatchSVMArguments : public BasicMutableCommandBufferTest
+{
+ using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+ MutableDispatchSVMArguments(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ bool Skip() override
+ {
+ cl_device_svm_capabilities svm_caps;
+ bool svm_capabilities =
+ !clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES,
+ sizeof(svm_caps), &svm_caps, NULL)
+ && svm_caps != 0;
+
+ return !svm_capabilities || BasicMutableCommandBufferTest::Skip();
+ }
+
+ virtual cl_int SetUp(int elements) override
+ {
+ BasicMutableCommandBufferTest::SetUp(elements);
+
+ const char *svm_arguments_kernel =
+ R"(
+ typedef struct {
+ global int* ptr;
+ } wrapper;
+ __kernel void test_svm_arguments(__global wrapper* pWrapper)
+ {
+ size_t i = get_global_id(0);
+ pWrapper->ptr[i]++;
+ })";
+
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ &svm_arguments_kernel,
+ "test_svm_arguments");
+
+ return 0;
+ }
+
+ cl_int Run() override
+ {
+ const cl_int zero = 0;
+ cl_int error;
+
+ // Allocate and initialize SVM for initial execution
+
+ cl_int *initWrapper = (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE,
+ sizeof(cl_int *), 0);
+ cl_int *initBuffer = (cl_int *)clSVMAlloc(
+ context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0);
+ test_assert_error(initWrapper != nullptr && initBuffer != nullptr,
+ "clSVMAlloc failed for initial execution");
+
+ error = clEnqueueSVMMemcpy(queue, CL_TRUE, initWrapper, &initBuffer,
+ sizeof(cl_int *), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMemcpy failed for initWrapper");
+
+ error = clEnqueueSVMMemFill(queue, initBuffer, &zero, sizeof(zero),
+ num_elements * sizeof(cl_int), 0, nullptr,
+ nullptr);
+ test_error(error, "clEnqueueSVMMemFill failed for initBuffer");
+
+ // Allocate and initialize SVM for modified execution
+
+ cl_int *newWrapper =
+ (cl_int *)clSVMAlloc(context, CL_MEM_READ_WRITE, sizeof(cl_int), 0);
+ cl_int *newBuffer = (cl_int *)clSVMAlloc(
+ context, CL_MEM_READ_WRITE, num_elements * sizeof(cl_int), 0);
+ test_assert_error(newWrapper != nullptr && newBuffer != nullptr,
+ "clSVMAlloc failed for modified execution");
+
+ error = clEnqueueSVMMemcpy(queue, CL_TRUE, newWrapper, &newBuffer,
+ sizeof(cl_int *), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMemcpy failed for newWrapper");
+
+ error = clEnqueueSVMMemFill(queue, newBuffer, &zero, sizeof(zero),
+ num_elements * sizeof(cl_int), 0, nullptr,
+ nullptr);
+ test_error(error, "clEnqueueSVMMemFill failed for newB");
+
+ // Build and execute the command buffer for the initial execution
+
+ error = clSetKernelArgSVMPointer(kernel, 0, initWrapper);
+ test_error(error, "clSetKernelArg failed for initWrapper");
+
+ error = clSetKernelExecInfo(kernel, CL_KERNEL_EXEC_INFO_SVM_PTRS,
+ sizeof(initBuffer), &initBuffer);
+ test_error(error, "clSetKernelExecInfo failed for initBuffer");
+
+ cl_ndrange_kernel_command_properties_khr props[] = {
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR
+ | CL_MUTABLE_DISPATCH_EXEC_INFO_KHR,
+ 0
+ };
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, props, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+
+ // Check the results of the initial execution
+
+ error =
+ clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, initBuffer,
+ num_elements * sizeof(cl_int), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMap failed for initBuffer");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ if (initBuffer[i] != 1)
+ {
+ log_error("Initial verification failed at index %zu: Got %d, "
+ "wanted 1\n",
+ i, initBuffer[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ error = clEnqueueSVMUnmap(queue, initBuffer, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMUnmap failed for initBuffer");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+
+ // Modify and execute the command buffer
+
+ cl_mutable_dispatch_arg_khr arg_svm{};
+ arg_svm.arg_index = 0;
+ arg_svm.arg_value = newWrapper;
+
+ cl_mutable_dispatch_exec_info_khr exec_info{};
+ exec_info.param_name = CL_KERNEL_EXEC_INFO_SVM_PTRS;
+ exec_info.param_value_size = sizeof(newBuffer);
+ exec_info.param_value = &newBuffer;
+
+ cl_mutable_dispatch_config_khr dispatch_config{};
+ dispatch_config.type = CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR;
+ dispatch_config.command = command;
+ dispatch_config.num_svm_args = 1;
+ dispatch_config.arg_svm_list = &arg_svm;
+ dispatch_config.num_exec_infos = 1;
+ dispatch_config.exec_info_list = &exec_info;
+
+ cl_mutable_base_config_khr mutable_config{};
+ mutable_config.type = CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR;
+ mutable_config.num_mutable_dispatch = 1;
+ mutable_config.mutable_dispatch_list = &dispatch_config;
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ // Check the results of the modified execution
+
+ error =
+ clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, newBuffer,
+ num_elements * sizeof(cl_int), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMap failed for newBuffer");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ if (newBuffer[i] != 1)
+ {
+ log_error("Modified verification failed at index %zu: Got %d, "
+ "wanted 1\n",
+ i, newBuffer[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ error = clEnqueueSVMUnmap(queue, newBuffer, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMUnmap failed for newBuffer");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+
+ // Clean up
+
+ clSVMFree(context, initWrapper);
+ clSVMFree(context, initBuffer);
+ clSVMFree(context, newWrapper);
+ clSVMFree(context, newBuffer);
+
+ return TEST_PASS;
+ }
+
+ cl_mutable_command_khr command = nullptr;
+};
+
+
+int test_mutable_dispatch_local_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchLocalArguments>(device, context, queue,
+ num_elements);
+}
+
+int test_mutable_dispatch_global_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchGlobalArguments>(device, context,
+ queue, num_elements);
+}
+
+int test_mutable_dispatch_pod_arguments(cl_device_id device, cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchPODArguments>(device, context, queue,
+ num_elements);
+}
+
+int test_mutable_dispatch_null_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchNullArguments>(device, context, queue,
+ num_elements);
+}
+
+int test_mutable_dispatch_svm_arguments(cl_device_id device, cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchSVMArguments>(device, context, queue,
+ num_elements);
+} \ No newline at end of file
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
index 9056a00d..19147556 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_basic.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef _CL_KHR_MUTABLE_COMMAND_BASIC_H
-#define _CL_KHR_MUTABLE_COMMAND_BASIC_H
+#ifndef CL_KHR_MUTABLE_COMMAND_BASIC_H
+#define CL_KHR_MUTABLE_COMMAND_BASIC_H
#include "../basic_command_buffer.h"
#include "../command_buffer_test_base.h"
@@ -84,24 +84,52 @@ struct BasicMutableCommandBufferTest : BasicCommandBufferTest
&platform, nullptr);
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
- // If it is supported get the addresses of all the APIs here.
-#define GET_EXTENSION_ADDRESS(FUNC) \
- FUNC = reinterpret_cast<FUNC##_fn>( \
- clGetExtensionFunctionAddressForPlatform(platform, #FUNC)); \
- if (FUNC == nullptr) \
- { \
- log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed" \
- " with " #FUNC "\n"); \
- return TEST_FAIL; \
+ GET_EXTENSION_ADDRESS(clUpdateMutableCommandsKHR);
+
+ return CL_SUCCESS;
}
+
+ clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
+
+ const char* kernelString = "__kernel void empty() {}";
+ const size_t global_work_size = 4 * 16;
+};
+
+struct InfoMutableCommandBufferTest : BasicMutableCommandBufferTest
+{
+ InfoMutableCommandBufferTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ virtual cl_int SetUp(int elements) override
+ {
+ BasicMutableCommandBufferTest::SetUp(elements);
+
+ cl_int error = init_extension_functions();
+ test_error(error, "Unable to initialise extension functions");
+
+ return CL_SUCCESS;
+ }
+
+ cl_int init_extension_functions()
+ {
+ BasicCommandBufferTest::init_extension_functions();
+
+ cl_platform_id platform;
+ cl_int error =
+ clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id),
+ &platform, nullptr);
+ test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
GET_EXTENSION_ADDRESS(clGetMutableCommandInfoKHR);
return CL_SUCCESS;
}
clGetMutableCommandInfoKHR_fn clGetMutableCommandInfoKHR = nullptr;
- const char* kernelString = "__kernel void empty() {}";
- const size_t global_work_size = 4 * sizeof(cl_int);
};
-#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H \ No newline at end of file
+#undef GET_EXTENSION_ADDRESS
+
+#endif //_CL_KHR_MUTABLE_COMMAND_BASIC_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp
new file mode 100644
index 00000000..80bc015a
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_offset.cpp
@@ -0,0 +1,170 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <extensionHelpers.h>
+#include "imageHelpers.h"
+#include "mutable_command_basic.h"
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases:
+//
+// CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR
+
+struct MutableDispatchGlobalOffset : InfoMutableCommandBufferTest
+{
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
+
+ MutableDispatchGlobalOffset(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
+ {}
+
+ bool Skip() override
+ {
+ cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+ bool mutable_support =
+ !clGetDeviceInfo(
+ device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+ sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+ && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR;
+
+ return !mutable_support || InfoMutableCommandBufferTest::Skip();
+ }
+
+ cl_int Run() override
+ {
+ const char *global_offset_kernel =
+ R"(
+ __kernel void sample_test(__global int *dst)
+ {
+ size_t tid = get_global_id(0);
+ dst[tid] = get_global_offset(0);
+ })";
+
+ cl_int error =
+ create_single_kernel_helper(context, &program, &kernel, 1,
+ &global_offset_kernel, "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ clMemWrapper stream;
+ stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
+ nullptr, &error);
+ test_error(error, "Creating test array failed");
+
+ /* Set the arguments */
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed.");
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 0 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ nullptr /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ &update_global_offset /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clGetMutableCommandInfoKHR(
+ command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR,
+ sizeof(info_global_offset), &info_global_offset, nullptr);
+ test_error(error, "clGetMutableCommandInfoKHR failed");
+
+ if (info_global_offset != update_global_offset)
+ {
+ log_error("ERROR: Wrong size returned from "
+ "clGetMutableCommandInfoKHR.");
+ return TEST_FAIL;
+ }
+
+ std::vector<cl_int> resultData;
+ resultData.resize(num_elements);
+
+ error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate,
+ resultData.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ if (i < update_global_offset && 0 != resultData[i])
+ {
+ log_error("Data failed to verify: update_global_offset != "
+ "resultData[%d]=%d\n",
+ i, resultData[i]);
+ return TEST_FAIL;
+ }
+ else if (i >= update_global_offset
+ && update_global_offset != resultData[i])
+ {
+ log_error("Data failed to verify: update_global_offset != "
+ "resultData[%d]=%d\n",
+ i, resultData[i]);
+ return TEST_FAIL;
+ }
+ return CL_SUCCESS;
+ }
+
+ size_t info_global_offset = 0;
+ const size_t update_global_offset = 3;
+ const size_t sizeToAllocate =
+ (global_work_size + update_global_offset) * sizeof(cl_int);
+ const size_t num_elements = sizeToAllocate / sizeof(cl_int);
+ cl_mutable_command_khr command = nullptr;
+};
+
+int test_mutable_dispatch_global_offset(cl_device_id device, cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+
+ return MakeAndRunTest<MutableDispatchGlobalOffset>(device, context, queue,
+ num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp
new file mode 100644
index 00000000..091f0c8d
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_global_size.cpp
@@ -0,0 +1,167 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <extensionHelpers.h>
+#include "imageHelpers.h"
+#include "mutable_command_basic.h"
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases:
+//
+// CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR
+
+struct MutableDispatchGlobalSize : public InfoMutableCommandBufferTest
+{
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
+
+ MutableDispatchGlobalSize(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
+ {}
+
+ bool Skip() override
+ {
+ cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+ bool mutable_support =
+ !clGetDeviceInfo(
+ device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+ sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+ && mutable_capabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR;
+
+ return !mutable_support || InfoMutableCommandBufferTest::Skip();
+ }
+
+ cl_int Run() override
+ {
+ const char *global_size_kernel =
+ R"(
+ __kernel void sample_test(__global int *dst)
+ {
+ size_t tid = get_global_id(0);
+ dst[tid] = get_global_size(0);
+ })";
+
+ cl_int error = create_single_kernel_helper(
+ context, &program, &kernel, 1, &global_size_kernel, "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ clMemWrapper stream;
+ stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
+ nullptr, &error);
+ test_error(error, "Creating test array failed");
+
+ /* Set the arguments */
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &global_work_size, nullptr, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed.");
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 0 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ nullptr /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ &update_global_size /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clGetMutableCommandInfoKHR(
+ command, CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR,
+ sizeof(info_global_size), &info_global_size, nullptr);
+ test_error(error, "clGetMutableCommandInfoKHR failed");
+
+ if (info_global_size != update_global_size)
+ {
+ log_error("ERROR: Wrong size returned from "
+ "clGetMutableCommandInfoKHR.");
+ return TEST_FAIL;
+ }
+
+ std::vector<cl_int> resultData;
+ resultData.resize(num_elements);
+
+ error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate,
+ resultData.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ if (i >= update_global_size && global_work_size != resultData[i])
+ {
+ log_error("Data failed to verify: update_global_size != "
+ "resultData[%d]=%d\n",
+ i, resultData[i]);
+ return TEST_FAIL;
+ }
+ else if (i < update_global_size
+ && update_global_size != resultData[i])
+ {
+ log_error("Data failed to verify: update_global_size != "
+ "resultData[%d]=%d\n",
+ i, resultData[i]);
+ return TEST_FAIL;
+ }
+
+ return CL_SUCCESS;
+ }
+
+ size_t info_global_size = 0;
+ const size_t update_global_size = 3;
+ const size_t sizeToAllocate = global_work_size;
+ const size_t num_elements = sizeToAllocate / sizeof(cl_int);
+ cl_mutable_command_khr command = nullptr;
+};
+
+int test_mutable_dispatch_global_size(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchGlobalSize>(device, context, queue,
+ num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp
new file mode 100644
index 00000000..b1ce25ec
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_image_arguments.cpp
@@ -0,0 +1,427 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <vector>
+#include "imageHelpers.h"
+#include "mutable_command_basic.h"
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases for
+// CL_MUTABLE_DISPATCH_ARGUMENTS_KHR:
+// - image arguments
+
+struct MutableDispatchImage1DArguments : public BasicMutableCommandBufferTest
+{
+ using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+ MutableDispatchImage1DArguments(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ virtual cl_int SetUp(int elements) override
+ {
+ BasicMutableCommandBufferTest::SetUp(elements);
+
+ return CL_SUCCESS;
+ }
+
+ bool Skip() override
+ {
+ cl_bool image_support;
+
+ cl_int error =
+ clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
+ sizeof(image_support), &image_support, nullptr);
+ test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+ cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+ bool mutable_support =
+ !clGetDeviceInfo(
+ device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+ sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+ && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
+
+ return (!mutable_support || !image_support)
+ || BasicMutableCommandBufferTest::Skip();
+ }
+
+ cl_int Run() override
+ {
+ const char *sample_const_arg_kernel =
+ R"(__kernel void sample_test( read_only image1d_t source, sampler_t
+ sampler, write_only image1d_t dest)
+ {
+ int offset = get_global_id(0);
+
+ int4 color = read_imagei( source, sampler, offset );
+
+ write_imagei( dest, offset, color );
+ })";
+
+ cl_int error;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+
+ cl_image_desc image_desc;
+ memset(&image_desc, 0x0, sizeof(cl_image_desc));
+ image_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
+ image_desc.image_width = 4;
+ image_desc.image_row_pitch = 0;
+ image_desc.num_mip_levels = 0;
+
+ const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+
+ image_descriptor imageInfo = { 0 };
+ imageInfo.type = CL_MEM_OBJECT_IMAGE1D;
+ imageInfo.format = &formats;
+ imageInfo.width = 4;
+
+ BufferOwningPtr<char> imageValues_input, imageValues_output, outputData;
+ MTdataHolder d(gRandomSeed);
+ generate_random_image_data(&imageInfo, imageValues_input, d);
+ generate_random_image_data(&imageInfo, imageValues_output, d);
+ generate_random_image_data(&imageInfo, outputData, d);
+
+ char *host_ptr_input = (char *)imageValues_input;
+ char *host_ptr_output = (char *)imageValues_output;
+
+ clMemWrapper src_image = create_image_1d(
+ context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats,
+ image_desc.image_width, 0, host_ptr_input, nullptr, &error);
+ test_error(error, "create_image_1d failed");
+
+ clMemWrapper dst_image = create_image_1d(
+ context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats,
+ image_desc.image_width, 0, host_ptr_output, nullptr, &error);
+ test_error(error, "create_image_2d failed");
+
+ error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &sample_const_arg_kernel,
+ "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ clSamplerWrapper sampler = clCreateSampler(
+ context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
+ test_error(error, "Unable to create sampler");
+
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ cl_ndrange_kernel_command_properties_khr props[] = {
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+ };
+
+ size_t globalDim[3] = { 4, 1, 1 }, localDim[3] = { 1, 1, 1 };
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, props, kernel, 1, nullptr, globalDim,
+ localDim, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed.");
+
+ clMemWrapper new_image = create_image_1d(
+ context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats,
+ image_desc.image_width, 0, host_ptr_output, nullptr, &error);
+ test_error(error, "create_image_1d failed");
+
+ cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image };
+ cl_mutable_dispatch_arg_khr args[] = { arg_2 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ size_t origin[3] = { 0, 0, 0 };
+ size_t region[3] = { image_desc.image_width, 1, 1 };
+
+ error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0,
+ 0, outputData, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadImage failed");
+
+ for (size_t i = 0; i < imageInfo.width; ++i)
+ {
+ if (imageValues_input[i] != outputData[i])
+ {
+ log_error("Data failed to verify: imageValues[%d]=%d != "
+ "outputData[%d]=%d\n",
+ i, imageValues_input[i], i, outputData[i]);
+
+ return TEST_FAIL;
+ }
+ }
+
+ return TEST_PASS;
+ }
+
+ cl_mutable_command_khr command = nullptr;
+};
+
+struct MutableDispatchImage2DArguments : public BasicMutableCommandBufferTest
+{
+ using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+
+ MutableDispatchImage2DArguments(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue)
+ {}
+
+ virtual cl_int SetUp(int elements) override
+ {
+ BasicMutableCommandBufferTest::SetUp(elements);
+
+ return CL_SUCCESS;
+ }
+
+ bool Skip() override
+ {
+ cl_bool image_support;
+
+ cl_int error =
+ clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT,
+ sizeof(image_support), &image_support, nullptr);
+ test_error(error, "clGetDeviceInfo for CL_DEVICE_IMAGE_SUPPORT failed");
+
+ cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+ bool mutable_support =
+ !clGetDeviceInfo(
+ device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+ sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+ && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
+
+ return (!mutable_support || !image_support)
+ || BasicMutableCommandBufferTest::Skip();
+ }
+
+ cl_int Run() override
+ {
+
+ const char *sample_const_arg_kernel =
+ R"(__kernel void sample_test( read_only image2d_t source, sampler_t
+ sampler, write_only image2d_t dest)
+ {
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+
+ int4 color = read_imagei( source, sampler, (int2) (x, y) );
+
+ write_imagei( dest, (int2) (x, y), color );
+ })";
+
+ cl_int error;
+ clProgramWrapper program;
+ clKernelWrapper kernel;
+
+ cl_image_desc image_desc;
+ memset(&image_desc, 0x0, sizeof(cl_image_desc));
+ image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ image_desc.image_width = 4;
+ image_desc.image_height = 4;
+ image_desc.image_row_pitch = 0;
+ image_desc.num_mip_levels = 0;
+
+ size_t data_size =
+ image_desc.image_width * image_desc.image_height * sizeof(cl_int);
+
+ const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
+
+ image_descriptor imageInfo = { 0 };
+ imageInfo.type = CL_MEM_OBJECT_IMAGE2D;
+ imageInfo.width = 4;
+ imageInfo.height = 4;
+ imageInfo.format = &formats;
+
+ BufferOwningPtr<char> imageValues_input, imageValues_output;
+
+ MTdataHolder d(gRandomSeed);
+ generate_random_image_data(&imageInfo, imageValues_input, d);
+ generate_random_image_data(&imageInfo, imageValues_output, d);
+
+ char *host_ptr_input = (char *)imageValues_input;
+ char *host_ptr_output = (char *)imageValues_output;
+ std::vector<char> outputData(data_size);
+
+ clMemWrapper src_image =
+ create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+ &formats, image_desc.image_width,
+ image_desc.image_height, 0, host_ptr_input, &error);
+ test_error(error, "create_image_2d failed");
+
+ clMemWrapper dst_image = create_image_2d(
+ context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats,
+ image_desc.image_width, image_desc.image_height, 0, host_ptr_output,
+ &error);
+ test_error(error, "create_image_2d failed");
+
+ error = create_single_kernel_helper(context, &program, &kernel, 1,
+ &sample_const_arg_kernel,
+ "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ clSamplerWrapper sampler = clCreateSampler(
+ context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error);
+ test_error(error, "Unable to create sampler");
+
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_image);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ error = clSetKernelArg(kernel, 1, sizeof(cl_sampler), &sampler);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ error = clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_image);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ size_t globalDim[3] = { 4, 4, 1 }, localDim[3] = { 1, 1, 1 };
+
+ cl_ndrange_kernel_command_properties_khr props[] = {
+ CL_MUTABLE_DISPATCH_UPDATABLE_FIELDS_KHR,
+ CL_MUTABLE_DISPATCH_ARGUMENTS_KHR, 0
+ };
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, props, kernel, 1, nullptr, globalDim,
+ localDim, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed.");
+
+ clMemWrapper new_image = create_image_2d(
+ context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &formats,
+ image_desc.image_width, image_desc.image_height, 0,
+ imageValues_output, &error);
+ test_error(error, "create_image_2d failed");
+
+ cl_mutable_dispatch_arg_khr arg_2{ 2, sizeof(cl_mem), &new_image };
+ cl_mutable_dispatch_arg_khr args[] = { arg_2 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ size_t origin[3] = { 0, 0, 0 };
+ size_t region[3] = { image_desc.image_width, image_desc.image_height,
+ 1 };
+
+ error = clEnqueueReadImage(queue, new_image, CL_TRUE, origin, region, 0,
+ 0, outputData.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadImage failed");
+
+ for (size_t i = 0; i < imageInfo.width * imageInfo.height; ++i)
+ {
+ if (imageValues_input[i] != outputData[i])
+ {
+ log_error("Data failed to verify: imageValues[%d]=%d != "
+ "outputData[%d]=%d\n",
+ i, imageValues_input[i], i, outputData[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ return TEST_PASS;
+ }
+
+ cl_mutable_command_khr command = nullptr;
+};
+
+int test_mutable_dispatch_image_1d_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchImage1DArguments>(device, context,
+ queue, num_elements);
+}
+
+int test_mutable_dispatch_image_2d_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchImage2DArguments>(device, context,
+ queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp
index cc425a4d..61600dc9 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_info.cpp
@@ -42,13 +42,13 @@
// CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR
// CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR
-struct InfoDeviceQuery : public BasicMutableCommandBufferTest
+struct InfoDeviceQuery : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
InfoDeviceQuery(cl_device_id device, cl_context context,
cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -71,12 +71,12 @@ struct InfoDeviceQuery : public BasicMutableCommandBufferTest
}
};
-struct InfoBuffer : public BasicMutableCommandBufferTest
+struct InfoBuffer : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
InfoBuffer(cl_device_id device, cl_context context, cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -108,13 +108,13 @@ struct InfoBuffer : public BasicMutableCommandBufferTest
cl_mutable_command_khr command = nullptr;
};
-struct PropertiesArray : public BasicMutableCommandBufferTest
+struct PropertiesArray : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
PropertiesArray(cl_device_id device, cl_context context,
cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -140,7 +140,7 @@ struct PropertiesArray : public BasicMutableCommandBufferTest
if (size != sizeof(props) || test_props[0] != props[0]
|| test_props[1] != props[1])
{
- log_error("ERROR: Incorrect command buffer returned from "
+ log_error("ERROR: Incorrect properties returned from "
"clGetMutableCommandInfoKHR.");
return TEST_FAIL;
}
@@ -154,12 +154,12 @@ struct PropertiesArray : public BasicMutableCommandBufferTest
cl_mutable_command_khr command = nullptr;
};
-struct Kernel : public BasicMutableCommandBufferTest
+struct Kernel : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
Kernel(cl_device_id device, cl_context context, cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -181,7 +181,7 @@ struct Kernel : public BasicMutableCommandBufferTest
// opaque object.
if (test_kernel != kernel)
{
- log_error("ERROR: Incorrect command buffer returned from "
+ log_error("ERROR: Incorrect kernel returned from "
"clGetMutableCommandInfoKHR.");
return TEST_FAIL;
}
@@ -195,12 +195,12 @@ struct Kernel : public BasicMutableCommandBufferTest
cl_mutable_command_khr command = nullptr;
};
-struct Dimensions : public BasicMutableCommandBufferTest
+struct Dimensions : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
Dimensions(cl_device_id device, cl_context context, cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -210,8 +210,7 @@ struct Dimensions : public BasicMutableCommandBufferTest
&global_work_size, nullptr, 0, nullptr, nullptr, &command);
test_error(error, "clCommandNDRangeKernelKHR failed");
- size_t test_dimensions;
-
+ cl_uint test_dimensions = 0;
error = clGetMutableCommandInfoKHR(
command, CL_MUTABLE_DISPATCH_DIMENSIONS_KHR,
sizeof(test_dimensions), &test_dimensions, nullptr);
@@ -219,7 +218,7 @@ struct Dimensions : public BasicMutableCommandBufferTest
if (test_dimensions != dimensions)
{
- log_error("ERROR: Incorrect command buffer returned from "
+ log_error("ERROR: Incorrect dimensions returned from "
"clGetMutableCommandInfoKHR.");
return TEST_FAIL;
}
@@ -234,12 +233,12 @@ struct Dimensions : public BasicMutableCommandBufferTest
const size_t dimensions = 3;
};
-struct InfoType : public BasicMutableCommandBufferTest
+struct InfoType : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
InfoType(cl_device_id device, cl_context context, cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -271,12 +270,12 @@ struct InfoType : public BasicMutableCommandBufferTest
cl_mutable_command_khr command = nullptr;
};
-struct InfoQueue : public BasicMutableCommandBufferTest
+struct InfoQueue : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
InfoQueue(cl_device_id device, cl_context context, cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -308,13 +307,13 @@ struct InfoQueue : public BasicMutableCommandBufferTest
cl_mutable_command_khr command = nullptr;
};
-struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest
+struct InfoGlobalWorkOffset : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
InfoGlobalWorkOffset(cl_device_id device, cl_context context,
cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -330,7 +329,7 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest
if (test_global_work_offset != global_work_offset)
{
- log_error("ERROR: Wrong size returned from "
+ log_error("ERROR: Wrong global work offset returned from "
"clGetMutableCommandInfoKHR.");
return TEST_FAIL;
}
@@ -346,13 +345,13 @@ struct InfoGlobalWorkOffset : public BasicMutableCommandBufferTest
size_t test_global_work_offset = 0;
};
-struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest
+struct InfoGlobalWorkSize : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
InfoGlobalWorkSize(cl_device_id device, cl_context context,
cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -368,7 +367,7 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest
if (test_global_work_size != global_work_size)
{
- log_error("ERROR: Wrong size returned from "
+ log_error("ERROR: Wrong global work size returned from "
"clGetMutableCommandInfoKHR.");
return TEST_FAIL;
}
@@ -383,13 +382,13 @@ struct InfoGlobalWorkSize : public BasicMutableCommandBufferTest
size_t test_global_work_size = 0;
};
-struct InfoLocalWorkSize : public BasicMutableCommandBufferTest
+struct InfoLocalWorkSize : public InfoMutableCommandBufferTest
{
- using BasicMutableCommandBufferTest::BasicMutableCommandBufferTest;
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
InfoLocalWorkSize(cl_device_id device, cl_context context,
cl_command_queue queue)
- : BasicMutableCommandBufferTest(device, context, queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
{}
cl_int Run() override
@@ -405,7 +404,7 @@ struct InfoLocalWorkSize : public BasicMutableCommandBufferTest
if (test_local_work_size != local_work_size)
{
- log_error("ERROR: Wrong size returned from "
+ log_error("ERROR: Wrong local work size returned from "
"clGetMutableCommandInfoKHR.");
return TEST_FAIL;
}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp
new file mode 100644
index 00000000..22a9da6d
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_local_size.cpp
@@ -0,0 +1,174 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <extensionHelpers.h>
+#include "typeWrappers.h"
+#include "procs.h"
+#include "testHarness.h"
+#include "mutable_command_basic.h"
+#include <vector>
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases:
+//
+// CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR
+
+struct MutableDispatchLocalSize : public InfoMutableCommandBufferTest
+{
+ using InfoMutableCommandBufferTest::InfoMutableCommandBufferTest;
+
+ MutableDispatchLocalSize(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : InfoMutableCommandBufferTest(device, context, queue)
+ {}
+
+ bool Skip() override
+ {
+ cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+ bool mutable_support =
+ !clGetDeviceInfo(
+ device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+ sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+ && mutable_capabilities & CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR;
+
+ return !mutable_support || InfoMutableCommandBufferTest::Skip();
+ }
+
+ cl_int Run() override
+ {
+ const char *local_size_kernel =
+ R"(
+ __kernel void sample_test(__global int *dst)
+ {
+ size_t tid = get_global_id(0);
+ dst[tid] = get_local_size(0);
+ })";
+
+ cl_int error = create_single_kernel_helper(
+ context, &program, &kernel, 1, &local_size_kernel, "sample_test");
+ test_error(error, "Creating kernel failed");
+
+ clMemWrapper stream;
+ stream = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
+ nullptr, &error);
+ test_error(error, "Creating test array failed");
+
+ /* Set the arguments */
+ error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &stream);
+ test_error(error, "Unable to set indexed kernel arguments");
+
+ error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &global_work_size, &local_work_size, 0, nullptr, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clFinish(queue);
+ test_error(error, "clFinish failed.");
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 0 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ nullptr /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ &update_global_size /* global_work_size */,
+ &update_local_size /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(command_buffer, &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clGetMutableCommandInfoKHR(
+ command, CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR,
+ sizeof(info_local_size), &info_local_size, nullptr);
+ test_error(error, "clGetMutableCommandInfoKHR failed");
+
+ if (info_local_size != update_local_size)
+ {
+ log_error("ERROR: Wrong size returned from "
+ "clGetMutableCommandInfoKHR.");
+ return TEST_FAIL;
+ }
+
+ std::vector<cl_int> resultData;
+ resultData.resize(num_elements);
+
+ error = clEnqueueReadBuffer(queue, stream, CL_TRUE, 0, sizeToAllocate,
+ resultData.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ if (i < update_global_size && update_local_size != resultData[i])
+ {
+ log_error("Data failed to verify: update_local_size != "
+ "resultData[%d]=%d\n",
+ i, resultData[i]);
+ return TEST_FAIL;
+ }
+ else if (i >= update_global_size
+ && local_work_size != resultData[i])
+ {
+ log_error("Data failed to verify: update_local_size != "
+ "resultData[%d]=%d\n",
+ i, resultData[i]);
+ return TEST_FAIL;
+ }
+
+ return CL_SUCCESS;
+ }
+
+ size_t info_local_size = 0;
+ const size_t global_work_size = 16;
+ const size_t local_work_size = 8;
+ const size_t update_global_size = 8;
+ const size_t update_local_size = 4;
+ const size_t sizeToAllocate = 64;
+ const size_t num_elements = sizeToAllocate / sizeof(cl_int);
+
+ cl_mutable_command_khr command = nullptr;
+};
+
+int test_mutable_dispatch_local_size(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<MutableDispatchLocalSize>(device, context, queue,
+ num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp
new file mode 100644
index 00000000..d507dadf
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/mutable_command_out_of_order.cpp
@@ -0,0 +1,454 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <extensionHelpers.h>
+#include <vector>
+#include "mutable_command_basic.h"
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+////////////////////////////////////////////////////////////////////////////////
+// mutable dispatch tests which handle following cases:
+// - simultaneous use
+// - cross-queue simultaneous-use
+
+namespace {
+
+template <bool simultaneous_request>
+struct OutOfOrderTest : public BasicMutableCommandBufferTest
+{
+ OutOfOrderTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicMutableCommandBufferTest(device, context, queue),
+ out_of_order_queue(nullptr), out_of_order_command_buffer(this),
+ user_event(nullptr), wait_pass_event(nullptr), kernel_fill(nullptr),
+ program_fill(nullptr)
+ {
+ simultaneous_use_requested = simultaneous_request;
+ if (simultaneous_request) buffer_size_multiplier = 2;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int SetUpKernel() override
+ {
+ cl_int error = BasicMutableCommandBufferTest::SetUpKernel();
+ test_error(error, "BasicMutableCommandBufferTest::SetUpKernel failed");
+
+ // create additional kernel to properly prepare output buffer for test
+ const char* kernel_str =
+ R"(
+ __kernel void fill(int pattern, __global int* out, __global int*
+ offset)
+ {
+ size_t id = get_global_id(0);
+ size_t ind = offset[0] + id ;
+ out[ind] = pattern;
+ })";
+
+ error = create_single_kernel_helper_create_program(
+ context, &program_fill, 1, &kernel_str);
+ test_error(error, "Failed to create program with source");
+
+ error =
+ clBuildProgram(program_fill, 1, &device, nullptr, nullptr, nullptr);
+ test_error(error, "Failed to build program");
+
+ kernel_fill = clCreateKernel(program_fill, "fill", &error);
+ test_error(error, "Failed to create copy kernel");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int SetUpKernelArgs() override
+ {
+ cl_int error = BasicMutableCommandBufferTest::SetUpKernelArgs();
+ test_error(error,
+ "BasicMutableCommandBufferTest::SetUpKernelArgs failed");
+
+ error = clSetKernelArg(kernel_fill, 0, sizeof(cl_int),
+ &overwritten_pattern);
+ test_error(error, "clSetKernelArg failed");
+
+ error = clSetKernelArg(kernel_fill, 1, sizeof(out_mem), &out_mem);
+ test_error(error, "clSetKernelArg failed");
+
+ error = clSetKernelArg(kernel_fill, 2, sizeof(off_mem), &off_mem);
+ test_error(error, "clSetKernelArg failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int SetUp(int elements) override
+ {
+ cl_int error = BasicMutableCommandBufferTest::SetUp(elements);
+ test_error(error, "BasicMutableCommandBufferTest::SetUp failed");
+
+ error = SetUpKernel();
+ test_error(error, "SetUpKernel failed");
+
+ out_of_order_queue = clCreateCommandQueue(
+ context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+ test_error(error, "Unable to create command queue to test with");
+
+ cl_command_buffer_properties_khr properties[3] = {
+ CL_COMMAND_BUFFER_FLAGS_KHR, CL_COMMAND_BUFFER_MUTABLE_KHR, 0
+ };
+
+ out_of_order_command_buffer = clCreateCommandBufferKHR(
+ 1, &out_of_order_queue, properties, &error);
+ test_error(error, "clCreateCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ bool Skip() override
+ {
+ cl_mutable_dispatch_fields_khr mutable_capabilities;
+
+ bool mutable_support =
+ !clGetDeviceInfo(
+ device, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR,
+ sizeof(mutable_capabilities), &mutable_capabilities, nullptr)
+ && mutable_capabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR;
+
+
+ return !out_of_order_support
+ || (simultaneous_use_requested && !simultaneous_use_support)
+ || !mutable_support || BasicMutableCommandBufferTest::Skip();
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int Run() override
+ {
+ cl_int error = CL_SUCCESS;
+
+ if (simultaneous_use_support)
+ {
+ // enqueue simultaneous command-buffers with out-of-order calls
+ error = RunSimultaneous();
+ test_error(error, "RunSimultaneous failed");
+ }
+ else
+ {
+ // enqueue single command-buffer with out-of-order calls
+ error = RunSingle();
+ test_error(error, "RunSingle failed");
+ }
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RecordCommandBuffer()
+ {
+ cl_sync_point_khr sync_points[2];
+ const cl_int pattern = pattern_pri;
+ cl_int error =
+ clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
+ &pattern, sizeof(cl_int), 0, data_size(), 0,
+ nullptr, &sync_points[0], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
+ out_mem, &overwritten_pattern,
+ sizeof(cl_int), 0, data_size(), 0,
+ nullptr, &sync_points[1], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ error = clCommandNDRangeKernelKHR(
+ out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &num_elements, nullptr, 2, sync_points, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RunSingle()
+ {
+ cl_int error;
+
+ error = RecordCommandBuffer();
+ test_error(error, "RecordCommandBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> output_data(num_elements);
+ error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
+ data_size(), output_data.data(), 1,
+ &single_event, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+ }
+
+ clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(cl_int) * num_elements
+ * buffer_size_multiplier,
+ nullptr, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
+ &new_out_mem };
+ cl_mutable_dispatch_arg_khr args[] = { arg_1 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(out_of_order_command_buffer,
+ &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 0, nullptr, &single_event);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_TRUE, 0,
+ data_size(), output_data.data(), 1,
+ &single_event, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_pri, output_data[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RecordSimultaneousCommandBuffer()
+ {
+ cl_sync_point_khr sync_points[2];
+ // for both simultaneous passes this call will fill entire in_mem buffer
+ cl_int error = clCommandFillBufferKHR(
+ out_of_order_command_buffer, nullptr, in_mem, &pattern_pri,
+ sizeof(cl_int), 0, data_size() * buffer_size_multiplier, 0, nullptr,
+ &sync_points[0], nullptr);
+ test_error(error, "clCommandFillBufferKHR failed");
+
+ // to avoid overwriting the entire result buffer instead of filling
+ // only relevant part this additional kernel was introduced
+
+ error = clCommandNDRangeKernelKHR(out_of_order_command_buffer, nullptr,
+ nullptr, kernel_fill, 1, nullptr,
+ &num_elements, nullptr, 0, nullptr,
+ &sync_points[1], &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clCommandNDRangeKernelKHR(
+ out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+ &num_elements, nullptr, 2, sync_points, nullptr, &command);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ struct SimulPassData
+ {
+ cl_int offset;
+ std::vector<cl_int> output_buffer;
+ // 0:user event, 1:offset-buffer fill event, 2:kernel done event
+ clEventWrapper wait_events[3];
+ };
+
+ //--------------------------------------------------------------------------
+ cl_int EnqueueSimultaneousPass(SimulPassData& pd)
+ {
+ cl_int error = CL_SUCCESS;
+ if (!user_event)
+ {
+ user_event = clCreateUserEvent(context, &error);
+ test_error(error, "clCreateUserEvent failed");
+ }
+
+ pd.wait_events[0] = user_event;
+
+ // filling offset buffer must wait for previous pass completeness
+ error = clEnqueueFillBuffer(
+ out_of_order_queue, off_mem, &pd.offset, sizeof(cl_int), 0,
+ sizeof(cl_int), (wait_pass_event != nullptr ? 1 : 0),
+ (wait_pass_event != nullptr ? &wait_pass_event : nullptr),
+ &pd.wait_events[1]);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ // command buffer execution must wait for two wait-events
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
+ &pd.wait_events[2]);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_FALSE,
+ pd.offset * sizeof(cl_int), data_size(),
+ pd.output_buffer.data(), 1,
+ &pd.wait_events[2], nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ clMemWrapper new_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+ sizeof(cl_int) * num_elements
+ * buffer_size_multiplier,
+ nullptr, &error);
+ test_error(error, "clCreateBuffer failed");
+
+ cl_mutable_dispatch_arg_khr arg_1{ 1, sizeof(new_out_mem),
+ &new_out_mem };
+ cl_mutable_dispatch_arg_khr args[] = { arg_1 };
+
+ cl_mutable_dispatch_config_khr dispatch_config{
+ CL_STRUCTURE_TYPE_MUTABLE_DISPATCH_CONFIG_KHR,
+ nullptr,
+ command,
+ 1 /* num_args */,
+ 0 /* num_svm_arg */,
+ 0 /* num_exec_infos */,
+ 0 /* work_dim - 0 means no change to dimensions */,
+ args /* arg_list */,
+ nullptr /* arg_svm_list - nullptr means no change*/,
+ nullptr /* exec_info_list */,
+ nullptr /* global_work_offset */,
+ nullptr /* global_work_size */,
+ nullptr /* local_work_size */
+ };
+ cl_mutable_base_config_khr mutable_config{
+ CL_STRUCTURE_TYPE_MUTABLE_BASE_CONFIG_KHR, nullptr, 1,
+ &dispatch_config
+ };
+
+ error = clUpdateMutableCommandsKHR(out_of_order_command_buffer,
+ &mutable_config);
+ test_error(error, "clUpdateMutableCommandsKHR failed");
+
+ // command buffer execution must wait for two wait-events
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 2, &pd.wait_events[0],
+ &pd.wait_events[2]);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clEnqueueReadBuffer(out_of_order_queue, new_out_mem, CL_FALSE,
+ pd.offset * sizeof(cl_int), data_size(),
+ pd.output_buffer.data(), 1,
+ &pd.wait_events[2], nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ cl_int RunSimultaneous()
+ {
+ cl_int error = RecordSimultaneousCommandBuffer();
+ test_error(error, "RecordSimultaneousCommandBuffer failed");
+
+ cl_int offset = static_cast<cl_int>(num_elements);
+
+ std::vector<SimulPassData> simul_passes = {
+ { 0, std::vector<cl_int>(num_elements) },
+ { offset, std::vector<cl_int>(num_elements) }
+ };
+
+ for (auto&& pass : simul_passes)
+ {
+ error = EnqueueSimultaneousPass(pass);
+ test_error(error, "EnqueueSimultaneousPass failed");
+
+ wait_pass_event = pass.wait_events[2];
+ }
+
+ error = clSetUserEventStatus(user_event, CL_COMPLETE);
+ test_error(error, "clSetUserEventStatus failed");
+
+ error = clFinish(out_of_order_queue);
+ test_error(error, "clFinish failed");
+
+ // verify the result buffers
+ for (auto&& pass : simul_passes)
+ {
+ auto& res_data = pass.output_buffer;
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_pri, res_data[i], i);
+ }
+ }
+
+ return CL_SUCCESS;
+ }
+
+ //--------------------------------------------------------------------------
+ clCommandQueueWrapper out_of_order_queue;
+ clCommandBufferWrapper out_of_order_command_buffer;
+
+ clEventWrapper user_event;
+ clEventWrapper single_event;
+ clEventWrapper wait_pass_event;
+
+ clKernelWrapper kernel_fill;
+ clProgramWrapper program_fill;
+
+ const size_t test_global_work_size = 3 * sizeof(cl_int);
+ cl_mutable_command_khr command = nullptr;
+
+ const cl_int overwritten_pattern = 0xACDC;
+ const cl_int pattern_pri = 42;
+};
+
+} // anonymous namespace
+
+int test_mutable_dispatch_out_of_order(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<OutOfOrderTest<false>>(device, context, queue,
+ num_elements);
+}
+
+int test_mutable_dispatch_simultaneous_out_of_order(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements)
+{
+ return MakeAndRunTest<OutOfOrderTest<true>>(device, context, queue,
+ num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
index 08512cae..1db48917 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/cl_khr_command_buffer_mutable_dispatch/procs.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
-#define _CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
+#ifndef CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
+#define CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H
#include <CL/cl.h>
@@ -59,4 +59,51 @@ extern int test_mutable_command_info_global_work_size(cl_device_id device,
cl_context context,
cl_command_queue queue,
int num_elements);
+extern int test_mutable_dispatch_image_1d_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_image_2d_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_global_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_local_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_pod_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_null_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_svm_arguments(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_out_of_order(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_simultaneous_out_of_order(
+ cl_device_id device, cl_context context, cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_global_size(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_local_size(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
+extern int test_mutable_dispatch_global_offset(cl_device_id device,
+ cl_context context,
+ cl_command_queue queue,
+ int num_elements);
#endif /*_CL_KHR_COMMAND_BUFFER_MUTABLE_DISPATCH_PROCS_H*/
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp
index be8530b2..6ef26bb9 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_event_sync.cpp
@@ -603,12 +603,15 @@ struct CommandBufferEventSync : public BasicCommandBufferTest
event_ptrs[1], nullptr);
test_error(error, "clEnqueueReadBuffer failed");
- error = clFinish(queue);
- test_error(error, "clFinish failed");
+ error = clFlush(queue);
+ test_error(error, "clFlush failed");
error = clFinish(queue_sec);
test_error(error, "clFinish failed");
+ error = clFinish(queue);
+ test_error(error, "clFinish failed");
+
// verify the result - result buffer must contain initial pattern
for (size_t i = 0; i < num_elements; i++)
{
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp
new file mode 100644
index 00000000..bd669165
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_finalize.cpp
@@ -0,0 +1,85 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "basic_command_buffer.h"
+#include "procs.h"
+
+namespace {
+
+// Test that finalizing a command-buffer that has already been finalized returns
+// the correct error code.
+struct FinalizeInvalid : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error = clCommandNDRangeKernelKHR(
+ command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+ nullptr, 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandNDRangeKernelKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ // Finalizing an already finalized command-buffer must return
+ // CL_INVALID_OPERATION
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_failure_error_ret(
+ error, CL_INVALID_OPERATION,
+ "clFinalizeCommandBufferKHR should return CL_INVALID_OPERATION",
+ TEST_FAIL);
+
+ return CL_SUCCESS;
+ }
+};
+
+// Check that an empty command-buffer can be finalized and then executed.
+struct FinalizeEmpty : public BasicCommandBufferTest
+{
+ using BasicCommandBufferTest::BasicCommandBufferTest;
+
+ cl_int Run() override
+ {
+ // Finalize an empty command-buffer
+ cl_int error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ // Execute empty command-buffer and then wait to complete
+ clEventWrapper event;
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, &event);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ error = clWaitForEvents(1, &event);
+ test_error(error, "clWaitForEvents failed");
+
+ return CL_SUCCESS;
+ }
+};
+} // anonymous namespace
+
+int test_finalize_invalid(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<FinalizeInvalid>(device, context, queue,
+ num_elements);
+}
+
+int test_finalize_empty(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<FinalizeEmpty>(device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
index 3ce410c0..63441970 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_get_command_buffer_info.cpp
@@ -26,6 +26,7 @@ enum class CombufInfoTestMode
CITM_REF_COUNT,
CITM_STATE,
CITM_PROP_ARRAY,
+ CITM_CONTEXT,
};
namespace {
@@ -38,6 +39,7 @@ namespace {
// -test case for CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR query
// -test case for CL_COMMAND_BUFFER_STATE_KHR query
// -test case for CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR query
+// -test case for CL_COMMAND_BUFFER_CONTEXT_KHR query
template <CombufInfoTestMode test_mode>
struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
@@ -70,6 +72,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
error = RunPropArrayInfoTest();
test_error(error, "RunPropArrayInfoTest failed");
break;
+ case CombufInfoTestMode::CITM_CONTEXT:
+ error = RunContextInfoTest();
+ test_error(error, "RunContextInfoTest failed");
+ break;
}
return CL_SUCCESS;
@@ -130,7 +136,7 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
// We can not check if this is the right queue because this is an opaque
// object, test against NULL.
- for (int i = 0; i < queue_list.size(); i++)
+ for (size_t i = 0; i < queue_list.size(); i++)
{
test_assert_error(
queue_list[i] == queue,
@@ -205,8 +211,7 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
// lambda to verify given state
auto verify_state = [&](const cl_command_buffer_state_khr &expected) {
- cl_command_buffer_state_khr state =
- CL_COMMAND_BUFFER_STATE_INVALID_KHR;
+ cl_command_buffer_state_khr state = ~cl_command_buffer_state_khr(0);
cl_int error = clGetCommandBufferInfoKHR(
command_buffer, CL_COMMAND_BUFFER_STATE_KHR, sizeof(state),
@@ -240,9 +245,10 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
clEventWrapper trigger_event = clCreateUserEvent(context, &error);
test_error(error, "clCreateUserEvent failed");
+ clEventWrapper execute_event;
// enqueued command buffer blocked on user event
error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
- &trigger_event, nullptr);
+ &trigger_event, &execute_event);
test_error(error, "clEnqueueCommandBufferKHR failed");
// verify pending state
@@ -255,6 +261,13 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
test_error(signal_error, "clSetUserEventStatus failed");
+ error = clWaitForEvents(1, &execute_event);
+ test_error(error, "Unable to wait for execute event");
+
+ // verify executable state
+ error = verify_state(CL_COMMAND_BUFFER_STATE_EXECUTABLE_KHR);
+ test_error(error, "verify_state failed");
+
return CL_SUCCESS;
}
@@ -315,6 +328,46 @@ struct CommandBufferGetCommandBufferInfo : public BasicCommandBufferTest
return TEST_FAIL;
}
+ cl_int RunContextInfoTest()
+ {
+ cl_int error = TEST_PASS;
+
+ // record command buffers
+ error = RecordCommandBuffer();
+ test_error(error, "RecordCommandBuffer failed");
+
+ size_t ret_value_size = 0;
+ error = clGetCommandBufferInfoKHR(command_buffer,
+ CL_COMMAND_BUFFER_CONTEXT_KHR, 0,
+ nullptr, &ret_value_size);
+ test_error(error, "clGetCommandBufferInfoKHR failed");
+
+ test_assert_error(
+ ret_value_size == sizeof(cl_context),
+ "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!");
+
+ cl_context ret_context = nullptr;
+ error = clGetCommandBufferInfoKHR(
+ command_buffer, CL_COMMAND_BUFFER_CONTEXT_KHR, sizeof(cl_context),
+ &ret_context, nullptr);
+ test_error(error, "clGetCommandBufferInfoKHR failed");
+ test_assert_error(
+ ret_context != nullptr,
+ "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!");
+
+ cl_context expected_context = nullptr;
+ error =
+ clGetCommandQueueInfo(queue, CL_QUEUE_CONTEXT, sizeof(cl_context),
+ &expected_context, nullptr);
+ test_error(error, "clGetCommandQueueInfo failed");
+
+ test_assert_error(
+ ret_context == expected_context,
+ "Unexpected result of CL_COMMAND_BUFFER_CONTEXT_KHR query!");
+
+ return TEST_PASS;
+ }
+
const cl_int pattern = 0xE;
};
@@ -352,3 +405,11 @@ int test_info_prop_array(cl_device_id device, cl_context context,
CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_PROP_ARRAY>>(
device, context, queue, num_elements);
}
+
+int test_info_context(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<
+ CommandBufferGetCommandBufferInfo<CombufInfoTestMode::CITM_CONTEXT>>(
+ device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
index 28d80450..c06bbf76 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_profiling.cpp
@@ -160,7 +160,7 @@ struct CommandBufferProfiling : public BasicCommandBufferTest
// verify the results by comparing timestamps
bool all_vals_0 = prof_params.front().value != 0;
- for (int i = 1; i < prof_params.size(); i++)
+ for (size_t i = 1; i < prof_params.size(); i++)
{
all_vals_0 = (prof_params[i].value != 0) ? false : all_vals_0;
if (prof_params[i - 1].value > prof_params[i].value)
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp
index d73fc9ce..82ff16f0 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_barrier.cpp
@@ -70,15 +70,42 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest
0, nullptr, out_of_order_command_buffer, 0, nullptr, &event);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_int> output_data(num_elements);
+ std::vector<cl_int> output_data_1(num_elements);
error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
- data_size(), output_data.data(), 1, &event,
- nullptr);
+ data_size(), output_data_1.data(), 1,
+ &event, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < num_elements; i++)
{
- CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ CHECK_VERIFICATION_ERROR(pattern, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+
+ error =
+ clEnqueueFillBuffer(queue, in_mem, &zero_pattern, sizeof(cl_int), 0,
+ data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBufferKHR failed");
+
+ error =
+ clEnqueueFillBuffer(queue, out_mem, &zero_pattern, sizeof(cl_int),
+ 0, data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(
+ 0, nullptr, out_of_order_command_buffer, 0, nullptr, &event);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_int> output_data_2(num_elements);
+ error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
+ data_size(), output_data_2.data(), 1,
+ &event, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern, output_data_2[i], i);
}
return CL_SUCCESS;
@@ -106,6 +133,7 @@ struct BarrierWithWaitListKHR : public BasicCommandBufferTest
}
const cl_int pattern = 0x16;
+ const cl_int zero_pattern = 0x0;
clCommandQueueWrapper out_of_order_queue;
clCommandBufferWrapper out_of_order_command_buffer;
clEventWrapper event;
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
index 0fd2e4ec..48abe25d 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
-#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#ifndef CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#define CL_KHR_COMMAND_BUFFER_TEST_BASE_H
#include <CL/cl_ext.h>
#include "harness/deviceInfo.h"
@@ -174,4 +174,4 @@ public:
}
-#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#endif // CL_KHR_COMMAND_BUFFER_TEST_BASE_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp
index 102ae761..0a30e76b 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_copy.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "basic_command_buffer.h"
+#include "svm_command_basic.h"
#include "harness/typeWrappers.h"
#include "procs.h"
@@ -38,7 +39,7 @@ struct CopyImageKHR : public BasicCommandBufferTest
cl_int Run() override
{
cl_int error = clCommandFillImageKHR(command_buffer, nullptr, src_image,
- fill_color, origin, region, 0,
+ fill_color_1, origin, region, 0,
nullptr, nullptr, nullptr);
test_error(error, "clCommandFillImageKHR failed");
@@ -56,13 +57,38 @@ struct CopyImageKHR : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_char> output_data(data_size);
- error = clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0,
- 0, output_data.data(), 0, nullptr, nullptr);
+ std::vector<cl_char> output_data_1(data_size);
+ error =
+ clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0,
+ output_data_1.data(), 0, nullptr, nullptr);
for (size_t i = 0; i < data_size; i++)
{
- CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+
+ error = clEnqueueFillImage(queue, src_image, fill_color_2, origin,
+ region, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillImageKHR failed");
+
+ error = clEnqueueFillImage(queue, dst_image, fill_color_2, origin,
+ region, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillImageKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size);
+ error =
+ clEnqueueReadImage(queue, dst_image, CL_TRUE, origin, region, 0, 0,
+ output_data_2.data(), 0, nullptr, nullptr);
+
+ for (size_t i = 0; i < data_size; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i);
}
return CL_SUCCESS;
@@ -97,8 +123,12 @@ struct CopyImageKHR : public BasicCommandBufferTest
const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
const size_t origin[3] = { 0, 0, 0 },
region[3] = { img_width, img_height, 1 };
- const cl_uint pattern = 0x05;
- const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+ const cl_uint pattern_1 = 0x05;
+ const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1,
+ pattern_1 };
+ const cl_uint pattern_2 = 0x1;
+ const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2,
+ pattern_2 };
const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
clMemWrapper src_image;
clMemWrapper dst_image;
@@ -111,7 +141,7 @@ struct CopyBufferKHR : public BasicCommandBufferTest
cl_int Run() override
{
cl_int error = clCommandFillBufferKHR(
- command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0,
+ command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0,
data_size(), 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
@@ -127,20 +157,113 @@ struct CopyBufferKHR : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_char> output_data(data_size());
+ std::vector<cl_char> output_data_1(data_size());
+ error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+ output_data_1.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < data_size(); i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char),
+ 0, data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBufferKHR failed");
+
+ error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char),
+ 0, data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size());
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
- output_data.data(), 0, nullptr, nullptr);
+ output_data_2.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < data_size(); i++)
{
- CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+
+ const cl_char pattern_1 = 0x14;
+ const cl_char pattern_2 = 0x28;
+};
+
+struct CopySVMBufferKHR : public BasicSVMCommandBufferTest
+{
+ using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error = clCommandSVMMemFillKHR(
+ command_buffer, nullptr, svm_in_mem(), &pattern_1, sizeof(cl_char),
+ data_size(), 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandSVMMemFillKHR failed");
+
+ error = clCommandSVMMemcpyKHR(command_buffer, nullptr, svm_out_mem(),
+ svm_in_mem(), data_size(), 0, nullptr,
+ nullptr, nullptr);
+ test_error(error, "clCommandSVMMemcpyKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_1(data_size());
+ error =
+ clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_1.data(),
+ svm_out_mem(), data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMemcpy failed");
+
+ for (size_t i = 0; i < data_size(); i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+ error = clEnqueueSVMMemFill(queue, svm_in_mem(), &pattern_2,
+ sizeof(cl_char), data_size(), 0, nullptr,
+ nullptr);
+ test_error(error, "clEnqueueSVMMemFill failed");
+
+ error = clEnqueueSVMMemFill(queue, svm_out_mem(), &pattern_2,
+ sizeof(cl_char), data_size(), 0, nullptr,
+ nullptr);
+ test_error(error, "clEnqueueSVMMemFill failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size());
+
+ error =
+ clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_2.data(),
+ svm_out_mem(), data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMemcpy failed");
+
+ for (size_t i = 0; i < data_size(); i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i);
}
return CL_SUCCESS;
}
- const cl_char pattern = 0x14;
+ const cl_char pattern_1 = 0x14;
+ const cl_char pattern_2 = 0x28;
};
struct CopyBufferToImageKHR : public BasicCommandBufferTest
@@ -150,7 +273,7 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest
cl_int Run() override
{
cl_int error = clCommandFillBufferKHR(
- command_buffer, nullptr, buffer, &pattern, sizeof(cl_char), 0,
+ command_buffer, nullptr, buffer, &pattern_1, sizeof(cl_char), 0,
data_size, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
@@ -168,15 +291,40 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_char> output_data(data_size);
+ std::vector<cl_char> output_data_1(data_size);
+
+ error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0,
+ output_data_1.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadImage failed");
+
+ for (size_t i = 0; i < data_size; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+
+ error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char),
+ 0, data_size, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueFillImage(queue, image, &fill_color_2, origin, region,
+ 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillImage failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size);
error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0,
- output_data.data(), 0, nullptr, nullptr);
+ output_data_2.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadImage failed");
for (size_t i = 0; i < data_size; i++)
{
- CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i);
}
return CL_SUCCESS;
@@ -211,7 +359,14 @@ struct CopyBufferToImageKHR : public BasicCommandBufferTest
const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
const size_t origin[3] = { 0, 0, 0 },
region[3] = { img_width, img_height, 1 };
- const cl_char pattern = 0x11;
+ const cl_char pattern_1 = 0x11;
+ const cl_char pattern_2 = 0x22;
+
+ const cl_uint fill_color_2[4] = { static_cast<cl_uint>(pattern_2),
+ static_cast<cl_uint>(pattern_2),
+ static_cast<cl_uint>(pattern_2),
+ static_cast<cl_uint>(pattern_2) };
+
const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
clMemWrapper buffer;
@@ -225,7 +380,7 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest
cl_int Run() override
{
cl_int error =
- clCommandFillImageKHR(command_buffer, nullptr, image, fill_color,
+ clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1,
origin, region, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillImageKHR failed");
@@ -243,16 +398,39 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_char> output_data(data_size);
+ std::vector<cl_char> output_data_1(data_size);
+
+ error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size,
+ output_data_1.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < data_size; i++)
+ {
+ CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1),
+ output_data_1[i], i);
+ }
+
+ error = clEnqueueFillImage(queue, image, fill_color_2, origin, region,
+ 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillImage failed");
+
+ error = clEnqueueFillBuffer(queue, buffer, &pattern_2, sizeof(cl_char),
+ 0, data_size, 0, nullptr, nullptr);
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size);
error = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, data_size,
- output_data.data(), 0, nullptr, nullptr);
+ output_data_2.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < data_size; i++)
{
- CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern),
- output_data[i], i);
+ CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1),
+ output_data_2[i], i);
}
return CL_SUCCESS;
@@ -287,8 +465,12 @@ struct CopyImageToBufferKHR : public BasicCommandBufferTest
const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
const size_t origin[3] = { 0, 0, 0 },
region[3] = { img_width, img_height, 1 };
- const cl_uint pattern = 0x12;
- const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+ const cl_uint pattern_1 = 0x12;
+ const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1,
+ pattern_1 };
+ const cl_uint pattern_2 = 0x24;
+ const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2,
+ pattern_2 };
const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
clMemWrapper image;
@@ -302,7 +484,7 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest
cl_int Run() override
{
cl_int error = clCommandFillBufferKHR(
- command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0,
+ command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0,
data_size, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
@@ -319,14 +501,38 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_char> output_data(data_size);
+ std::vector<cl_char> output_data_1(data_size);
+ error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size,
+ output_data_1.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < data_size; i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+
+ error = clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char),
+ 0, data_size, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueFillBuffer(queue, out_mem, &pattern_2, sizeof(cl_char),
+ 0, data_size, 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillBuffer failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size);
error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size,
- output_data.data(), 0, nullptr, nullptr);
+ output_data_2.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < data_size; i++)
{
- CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i);
}
return CL_SUCCESS;
@@ -353,7 +559,8 @@ struct CopyBufferRectKHR : public BasicCommandBufferTest
const size_t data_size = img_width * img_height * sizeof(cl_char);
const size_t origin[3] = { 0, 0, 0 },
region[3] = { img_width, img_height, 1 };
- const cl_char pattern = 0x13;
+ const cl_char pattern_1 = 0x13;
+ const cl_char pattern_2 = 0x26;
clMemWrapper in_mem;
clMemWrapper out_mem;
@@ -372,6 +579,14 @@ int test_copy_buffer(cl_device_id device, cl_context context,
return MakeAndRunTest<CopyBufferKHR>(device, context, queue, num_elements);
}
+int test_copy_svm_buffer(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<CopySVMBufferKHR>(device, context, queue,
+ num_elements);
+}
+
+
int test_copy_buffer_to_image(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp
index 88e97a27..67809cfb 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_fill.cpp
@@ -14,6 +14,7 @@
// limitations under the License.
//
#include "basic_command_buffer.h"
+#include "svm_command_basic.h"
#include "harness/typeWrappers.h"
#include "procs.h"
@@ -35,7 +36,7 @@ struct FillImageKHR : public BasicCommandBufferTest
cl_int Run() override
{
cl_int error =
- clCommandFillImageKHR(command_buffer, nullptr, image, fill_color,
+ clCommandFillImageKHR(command_buffer, nullptr, image, fill_color_1,
origin, region, 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillImageKHR failed");
@@ -47,14 +48,34 @@ struct FillImageKHR : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_char> output_data(data_size);
+ std::vector<cl_char> output_data_1(data_size);
error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0,
- output_data.data(), 0, nullptr, nullptr);
+ output_data_1.data(), 0, nullptr, nullptr);
for (size_t i = 0; i < data_size; i++)
{
- CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern),
- output_data[i], i);
+ CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1),
+ output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+
+ error = clEnqueueFillImage(queue, image, fill_color_2, origin, region,
+ 0, nullptr, nullptr);
+ test_error(error, "clEnqueueFillImage failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size);
+ error = clEnqueueReadImage(queue, image, CL_TRUE, origin, region, 0, 0,
+ output_data_2.data(), 0, nullptr, nullptr);
+
+ for (size_t i = 0; i < data_size; i++)
+ {
+ CHECK_VERIFICATION_ERROR(static_cast<cl_char>(pattern_1),
+ output_data_2[i], i);
}
return CL_SUCCESS;
@@ -85,8 +106,12 @@ struct FillImageKHR : public BasicCommandBufferTest
const size_t data_size = img_width * img_height * 4 * sizeof(cl_char);
const size_t origin[3] = { 0, 0, 0 },
region[3] = { img_width, img_height, 1 };
- const cl_uint pattern = 0x10;
- const cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+ const cl_uint pattern_1 = 0x10;
+ const cl_uint fill_color_1[4] = { pattern_1, pattern_1, pattern_1,
+ pattern_1 };
+ const cl_uint pattern_2 = 0x20;
+ const cl_uint fill_color_2[4] = { pattern_2, pattern_2, pattern_2,
+ pattern_2 };
const cl_image_format formats = { CL_RGBA, CL_UNSIGNED_INT8 };
clMemWrapper image;
@@ -99,7 +124,7 @@ struct FillBufferKHR : public BasicCommandBufferTest
cl_int Run() override
{
cl_int error = clCommandFillBufferKHR(
- command_buffer, nullptr, in_mem, &pattern, sizeof(cl_char), 0,
+ command_buffer, nullptr, in_mem, &pattern_1, sizeof(cl_char), 0,
data_size(), 0, nullptr, nullptr, nullptr);
test_error(error, "clCommandFillBufferKHR failed");
@@ -111,22 +136,100 @@ struct FillBufferKHR : public BasicCommandBufferTest
nullptr, nullptr);
test_error(error, "clEnqueueCommandBufferKHR failed");
- std::vector<cl_char> output_data(data_size());
+ std::vector<cl_char> output_data_1(data_size());
+ error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(),
+ output_data_1.data(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueReadBuffer failed");
+
+ for (size_t i = 0; i < data_size(); i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+
+ clEnqueueFillBuffer(queue, in_mem, &pattern_2, sizeof(cl_char), 0,
+ data_size(), 0, nullptr, nullptr);
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size());
error = clEnqueueReadBuffer(queue, in_mem, CL_TRUE, 0, data_size(),
- output_data.data(), 0, nullptr, nullptr);
+ output_data_2.data(), 0, nullptr, nullptr);
test_error(error, "clEnqueueReadBuffer failed");
for (size_t i = 0; i < data_size(); i++)
{
- CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i);
}
return CL_SUCCESS;
}
- const char pattern = 0x15;
+ const char pattern_1 = 0x15;
+ const char pattern_2 = 0x30;
};
+struct FillSVMBufferKHR : public BasicSVMCommandBufferTest
+{
+ using BasicSVMCommandBufferTest::BasicSVMCommandBufferTest;
+
+ cl_int Run() override
+ {
+ cl_int error = clCommandSVMMemFillKHR(
+ command_buffer, nullptr, svm_in_mem(), &pattern_1, sizeof(cl_char),
+ data_size(), 0, nullptr, nullptr, nullptr);
+ test_error(error, "clCommandSVMMemFillKHR failed");
+
+ error = clFinalizeCommandBufferKHR(command_buffer);
+ test_error(error, "clFinalizeCommandBufferKHR failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_1(data_size());
+
+ error =
+ clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_1.data(),
+ svm_in_mem(), data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMemcpy failed");
+
+ for (size_t i = 0; i < data_size(); i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_1[i], i);
+ }
+
+ /* Check second enqueue of command buffer */
+ error = clEnqueueSVMMemFill(queue, svm_in_mem(), &pattern_2,
+ sizeof(cl_char), data_size(), 0, nullptr,
+ nullptr);
+ test_error(error, "clEnqueueSVMMemFill failed");
+
+ error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+ nullptr, nullptr);
+ test_error(error, "clEnqueueCommandBufferKHR failed");
+
+ std::vector<cl_char> output_data_2(data_size());
+
+ error =
+ clEnqueueSVMMemcpy(queue, CL_TRUE, output_data_2.data(),
+ svm_in_mem(), data_size(), 0, nullptr, nullptr);
+ test_error(error, "clEnqueueSVMMemcpy failed");
+
+ for (size_t i = 0; i < data_size(); i++)
+ {
+ CHECK_VERIFICATION_ERROR(pattern_1, output_data_2[i], i);
+ }
+
+ return CL_SUCCESS;
+ }
+
+ const char pattern_1 = 0x15;
+ const char pattern_2 = 0x30;
+};
};
int test_fill_buffer(cl_device_id device, cl_context context,
@@ -135,6 +238,14 @@ int test_fill_buffer(cl_device_id device, cl_context context,
return MakeAndRunTest<FillBufferKHR>(device, context, queue, num_elements);
}
+int test_fill_svm_buffer(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return MakeAndRunTest<FillSVMBufferKHR>(device, context, queue,
+ num_elements);
+}
+
+
int test_fill_image(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements)
{
diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
index 4eefc8ab..4ecb0806 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/main.cpp
+++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
@@ -26,6 +26,7 @@ test_definition test_list[] = {
ADD_TEST(info_ref_count),
ADD_TEST(info_state),
ADD_TEST(info_prop_array),
+ ADD_TEST(info_context),
ADD_TEST(basic_profiling),
ADD_TEST(simultaneous_profiling),
ADD_TEST(regular_wait_for_command_buffer),
@@ -44,8 +45,10 @@ test_definition test_list[] = {
ADD_TEST(simultaneous_queue_substitution),
ADD_TEST(fill_image),
ADD_TEST(fill_buffer),
+ ADD_TEST(fill_svm_buffer),
ADD_TEST(copy_image),
ADD_TEST(copy_buffer),
+ ADD_TEST(copy_svm_buffer),
ADD_TEST(copy_buffer_to_image),
ADD_TEST(copy_image_to_buffer),
ADD_TEST(copy_buffer_rect),
@@ -58,7 +61,9 @@ test_definition test_list[] = {
ADD_TEST(event_info_command_queue),
ADD_TEST(event_info_execution_status),
ADD_TEST(event_info_context),
- ADD_TEST(event_info_reference_count)
+ ADD_TEST(event_info_reference_count),
+ ADD_TEST(finalize_invalid),
+ ADD_TEST(finalize_empty)
};
int main(int argc, const char *argv[])
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
index 63e004a7..ce121cea 100644
--- a/test_conformance/extensions/cl_khr_command_buffer/procs.h
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H
-#define _CL_KHR_COMMAND_BUFFER_PROCS_H
+#ifndef CL_KHR_COMMAND_BUFFER_PROCS_H
+#define CL_KHR_COMMAND_BUFFER_PROCS_H
#include <CL/cl.h>
@@ -41,6 +41,8 @@ extern int test_info_state(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_info_prop_array(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
+extern int test_info_context(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
extern int test_basic_set_kernel_arg(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_pending_set_kernel_arg(cl_device_id device, cl_context context,
@@ -101,10 +103,14 @@ extern int test_fill_image(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_fill_buffer(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
+extern int test_fill_svm_buffer(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
extern int test_copy_image(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_copy_buffer(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
+extern int test_copy_svm_buffer(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
extern int test_copy_buffer_to_image(cl_device_id device, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_copy_image_to_buffer(cl_device_id device, cl_context context,
@@ -130,5 +136,9 @@ extern int test_event_info_reference_count(cl_device_id device,
cl_context context,
cl_command_queue queue,
int num_elements);
+extern int test_finalize_invalid(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
+extern int test_finalize_empty(cl_device_id device, cl_context context,
+ cl_command_queue queue, int num_elements);
-#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/
+#endif // CL_KHR_COMMAND_BUFFER_PROCS_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp
new file mode 100644
index 00000000..1fc48ce5
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.cpp
@@ -0,0 +1,94 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "svm_command_basic.h"
+
+//--------------------------------------------------------------------------
+
+bool BasicSVMCommandBufferTest::Skip()
+{
+ if (BasicCommandBufferTest::Skip()) return true;
+
+ Version version = get_device_cl_version(device);
+ if (version < Version(2, 0))
+ {
+ log_info("test requires OpenCL 2.x/3.0 device");
+ return true;
+ }
+
+ cl_device_svm_capabilities svm_capabilities;
+ cl_int error =
+ clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES,
+ sizeof(svm_capabilities), &svm_capabilities, NULL);
+ if (error != CL_SUCCESS)
+ {
+ print_error(error, "Unable to query CL_DEVICE_SVM_CAPABILITIES");
+ return true;
+ }
+
+ if (svm_capabilities == 0)
+ {
+ log_info("Device property CL_DEVICE_SVM_COARSE_GRAIN_BUFFER not "
+ "supported \n");
+ return true;
+ }
+
+ if (init_extension_functions() != CL_SUCCESS)
+ {
+ log_error("Unable to initialise extension functions");
+ return true;
+ }
+
+ return false;
+}
+
+//--------------------------------------------------------------------------
+
+cl_int BasicSVMCommandBufferTest::SetUpKernelArgs(void)
+{
+ size_t size = sizeof(cl_int) * num_elements * buffer_size_multiplier;
+ svm_in_mem = clSVMWrapper(context, size);
+ if (svm_in_mem() == nullptr)
+ {
+ log_error("Unable to allocate SVM memory");
+ return CL_OUT_OF_RESOURCES;
+ }
+ svm_out_mem = clSVMWrapper(context, size);
+ if (svm_out_mem() == nullptr)
+ {
+ log_error("Unable to allocate SVM memory");
+ return CL_OUT_OF_RESOURCES;
+ }
+ return CL_SUCCESS;
+}
+
+//--------------------------------------------------------------------------
+
+cl_int BasicSVMCommandBufferTest::init_extension_functions()
+{
+ cl_int error = BasicCommandBufferTest::init_extension_functions();
+ test_error(error, "Unable to initialise extension functions");
+
+ cl_platform_id platform;
+ error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id),
+ &platform, nullptr);
+ test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+ GET_EXTENSION_ADDRESS(clCommandSVMMemFillKHR);
+ GET_EXTENSION_ADDRESS(clCommandSVMMemcpyKHR);
+
+ return CL_SUCCESS;
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h
new file mode 100644
index 00000000..f6b6b427
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/svm_command_basic.h
@@ -0,0 +1,42 @@
+//
+// Copyright (c) 2023 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef CL_KHR_SVM_COMMAND_BASIC_H
+#define CL_KHR_SVM_COMMAND_BASIC_H
+
+#include "basic_command_buffer.h"
+
+
+struct BasicSVMCommandBufferTest : BasicCommandBufferTest
+{
+ BasicSVMCommandBufferTest(cl_device_id device, cl_context context,
+ cl_command_queue queue)
+ : BasicCommandBufferTest(device, context, queue)
+ {}
+
+ virtual bool Skip() override;
+ virtual cl_int SetUpKernelArgs(void) override;
+
+protected:
+ cl_int init_extension_functions();
+
+ clCommandSVMMemFillKHR_fn clCommandSVMMemFillKHR = nullptr;
+ clCommandSVMMemcpyKHR_fn clCommandSVMMemcpyKHR = nullptr;
+
+ clSVMWrapper svm_in_mem, svm_out_mem;
+};
+
+#endif
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/procs.h b/test_conformance/extensions/cl_khr_external_semaphore/procs.h
index 753c8fe2..7e1c4caf 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/procs.h
+++ b/test_conformance/extensions/cl_khr_external_semaphore/procs.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
-#define _CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
+#ifndef CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
+#define CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
#include <CL/cl.h>
@@ -79,4 +79,4 @@ extern int test_external_semaphores_invalid_command(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements);
-#endif /* CL_KHR_EXTERNAL_SEMAPHORE */
+#endif // CL_KHR_EXTERNAL_SEMAPHORE_PROCS_H
diff --git a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
index a7ed307e..89ab17b3 100644
--- a/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
+++ b/test_conformance/extensions/cl_khr_external_semaphore/test_external_semaphore.cpp
@@ -120,9 +120,11 @@ int test_external_semaphores_queries(cl_device_id deviceID, cl_context context,
SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr,
CL_SEMAPHORE_TYPE_BINARY_KHR);
- SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_uint, 1);
+ SEMAPHORE_PARAM_TEST(CL_DEVICE_HANDLE_LIST_KHR, cl_device_id, deviceID);
- SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint, 1);
+ SEMAPHORE_PARAM_TEST(
+ CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl_uint,
+ getCLSemaphoreTypeFromVulkanType(vkExternalSemaphoreHandleType));
// Confirm that querying CL_SEMAPHORE_CONTEXT_KHR returns the right context
SEMAPHORE_PARAM_TEST(CL_SEMAPHORE_CONTEXT_KHR, cl_context, context);
@@ -290,7 +292,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID,
nullptr, 0, nullptr, &wait_event);
test_error(err, "Could not wait semaphore");
- // Finish queue_1 and queue_2
+ // Finish queue_1 and queue_2
err = clFinish(queue_1);
test_error(err, "Could not finish queue");
@@ -304,7 +306,7 @@ static int semaphore_external_cross_queue_helper(cl_device_id deviceID,
return TEST_PASS;
}
-// Confirm that a signal followed by a wait will complete successfully
+// Confirm that a signal followed by a wait will complete successfully
int test_external_semaphores_simple_1(cl_device_id deviceID, cl_context context,
cl_command_queue defaultQueue,
int num_elements)
@@ -931,420 +933,3 @@ int test_external_semaphores_multi_wait(cl_device_id deviceID,
return TEST_PASS;
}
-
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first one deferred wait, then one non deferred signal.
-int test_external_semaphores_order_1(cl_device_id deviceID, cl_context context,
- cl_command_queue defaultQueue,
- int num_elements)
-{
- if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- if (init_vuikan_device())
- {
- log_info("Cannot initialise Vulkan. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- VulkanDevice vkDevice;
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-
- const std::vector<VulkanExternalMemoryHandleType>
- vkExternalMemoryHandleTypeList =
- getSupportedVulkanExternalMemoryHandleTypeList();
- VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
- getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
- VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
-
- clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
- vkExternalSemaphoreHandleType, deviceID);
-
- cl_int err = CL_SUCCESS;
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create user event
- clEventWrapper user_event = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Wait semaphore (dependency on user_event)
- clEventWrapper wait_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event, &wait_event);
- test_error(err, "Could not wait semaphore");
-
- // Signal semaphore
- clEventWrapper signal_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 0, nullptr, &signal_event);
- test_error(err, "Could not signal semaphore");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure signal event is completed while wait event is not
- test_assert_event_complete(signal_event);
- test_assert_event_inprogress(wait_event);
-
- // Complete user_event
- err = clSetUserEventStatus(user_event, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Finish
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- // Ensure all events are completed
- test_assert_event_complete(signal_event);
- test_assert_event_complete(wait_event);
-
- return TEST_PASS;
-}
-
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first two deferred signals, then one deferred wait. Unblock
-// signal, then unblock wait. When wait completes, unblock the other signal.
-int test_external_semaphores_order_2(cl_device_id deviceID, cl_context context,
- cl_command_queue defaultQueue,
- int num_elements)
-{
- if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- if (init_vuikan_device())
- {
- log_info("Cannot initialise Vulkan. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- VulkanDevice vkDevice;
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-
- const std::vector<VulkanExternalMemoryHandleType>
- vkExternalMemoryHandleTypeList =
- getSupportedVulkanExternalMemoryHandleTypeList();
- VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
- getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
- VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
-
- clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
- vkExternalSemaphoreHandleType, deviceID);
-
- cl_int err = CL_SUCCESS;
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create user events
- clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Signal semaphore (dependency on user_event_1)
- clEventWrapper signal_1_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event_1,
- &signal_1_event);
- test_error(err, "Could not signal semaphore");
-
- // Signal semaphore (dependency on user_event_2)
- clEventWrapper signal_2_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event_2,
- &signal_2_event);
- test_error(err, "Could not signal semaphore");
-
- // Wait semaphore (dependency on user_event_3)
- clEventWrapper wait_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event_3, &wait_event);
- test_error(err, "Could not wait semaphore");
-
- // Complete user_event_1
- err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Complete user_event_3
- err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure all events are completed except for second signal
- test_assert_event_complete(signal_1_event);
- test_assert_event_inprogress(signal_2_event);
- test_assert_event_complete(wait_event);
-
- // Complete user_event_2
- err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Finish
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- // Ensure all events are completed
- test_assert_event_complete(signal_1_event);
- test_assert_event_complete(signal_2_event);
- test_assert_event_complete(wait_event);
-
- return TEST_PASS;
-}
-
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first two deferred signals, then two deferred waits. Unblock
-// one signal and one wait (both blocked by the same user event). When wait
-// completes, unblock the other signal. Then unblock the other wait.
-int test_external_semaphores_order_3(cl_device_id deviceID, cl_context context,
- cl_command_queue defaultQueue,
- int num_elements)
-{
- if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- if (init_vuikan_device())
- {
- log_info("Cannot initialise Vulkan. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- VulkanDevice vkDevice;
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-
- const std::vector<VulkanExternalMemoryHandleType>
- vkExternalMemoryHandleTypeList =
- getSupportedVulkanExternalMemoryHandleTypeList();
- VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
- getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
- VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
-
- clExternalSemaphore sema_ext(vkVk2CLSemaphore, context,
- vkExternalSemaphoreHandleType, deviceID);
-
- cl_int err = CL_SUCCESS;
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create user events
- clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Signal semaphore (dependency on user_event_1)
- clEventWrapper signal_1_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event_1,
- &signal_1_event);
- test_error(err, "Could not signal semaphore");
-
- // Signal semaphore (dependency on user_event_2)
- clEventWrapper signal_2_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event_2,
- &signal_2_event);
- test_error(err, "Could not signal semaphore");
-
- // Wait semaphore (dependency on user_event_3)
- clEventWrapper wait_1_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event_3, &wait_1_event);
- test_error(err, "Could not wait semaphore");
-
- // Wait semaphore (dependency on user_event_2)
- clEventWrapper wait_2_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema_ext.getCLSemaphore(),
- nullptr, 1, &user_event_2, &wait_2_event);
- test_error(err, "Could not wait semaphore");
-
- // Complete user_event_2
- err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure only second signal and second wait completed
- cl_event event_list[] = { signal_2_event, wait_2_event };
- err = clWaitForEvents(2, event_list);
- test_error(err, "Could not wait for events");
-
- test_assert_event_inprogress(signal_1_event);
- test_assert_event_inprogress(wait_1_event);
-
- // Complete user_event_1
- err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Complete user_event_3
- err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Finish
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- // Ensure all events are completed
- test_assert_event_complete(signal_1_event);
- test_assert_event_complete(signal_2_event);
- test_assert_event_complete(wait_1_event);
- test_assert_event_complete(wait_2_event);
-
- return TEST_PASS;
-}
-
-// Test that an invalid semaphore command results in the invalidation of the
-// command's event and the dependencies' events
-int test_external_semaphores_invalid_command(cl_device_id deviceID,
- cl_context context,
- cl_command_queue defaultQueue,
- int num_elements)
-{
- if (!is_extension_available(deviceID, "cl_khr_external_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- if (init_vuikan_device())
- {
- log_info("Cannot initialise Vulkan. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- VulkanDevice vkDevice;
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
-
- const std::vector<VulkanExternalMemoryHandleType>
- vkExternalMemoryHandleTypeList =
- getSupportedVulkanExternalMemoryHandleTypeList();
- VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
- getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
- VulkanSemaphore vkVk2CLSemaphore1(vkDevice, vkExternalSemaphoreHandleType);
- VulkanSemaphore vkVk2CLSemaphore2(vkDevice, vkExternalSemaphoreHandleType);
-
- clExternalSemaphore sema_ext_1(vkVk2CLSemaphore1, context,
- vkExternalSemaphoreHandleType, deviceID);
- clExternalSemaphore sema_ext_2(vkVk2CLSemaphore2, context,
- vkExternalSemaphoreHandleType, deviceID);
-
- cl_int err = CL_SUCCESS;
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create user events
- clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Signal semaphore_1 (dependency on user_event_1)
- clEventWrapper signal_1_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
- nullptr, 1, &user_event_1,
- &signal_1_event);
- test_error(err, "Could not signal semaphore");
-
- // Wait semaphore_1 and semaphore_2 (dependency on user_event_1)
- clEventWrapper wait_event;
- cl_semaphore_khr sema_list[] = { sema_ext_1.getCLSemaphore(),
- sema_ext_2.getCLSemaphore() };
- err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1,
- &user_event_1, &wait_event);
- test_error(err, "Could not wait semaphore");
-
- // Signal semaphore_1 (dependency on wait_event and user_event_2)
- clEventWrapper signal_2_event;
- cl_event wait_list[] = { user_event_2, wait_event };
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_ext_1.getCLSemaphore(),
- nullptr, 2, wait_list, &signal_2_event);
- test_error(err, "Could not signal semaphore");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure all events are not completed
- test_assert_event_inprogress(signal_1_event);
- test_assert_event_inprogress(signal_2_event);
- test_assert_event_inprogress(wait_event);
-
- // Complete user_event_1 (expect failure as waiting on semaphore_2 is not
- // allowed (unsignaled)
- err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
- test_assert_error(err != CL_SUCCESS,
- "signal_2_event completed unexpectedly");
-
- // Ensure signal_1 is completed while others failed (the second signal
- // should fail as it depends on wait)
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- test_assert_event_complete(signal_1_event);
- test_assert_event_terminated(wait_event);
- test_assert_event_terminated(signal_2_event);
-
- return TEST_PASS;
-}
diff --git a/test_conformance/extensions/cl_khr_semaphore/main.cpp b/test_conformance/extensions/cl_khr_semaphore/main.cpp
index ab9699b0..0ae7206a 100644
--- a/test_conformance/extensions/cl_khr_semaphore/main.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/main.cpp
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2023 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -34,11 +34,7 @@ test_definition test_list[] = {
ADD_TEST_VERSION(semaphores_multi_signal, Version(1, 2)),
ADD_TEST_VERSION(semaphores_multi_wait, Version(1, 2)),
ADD_TEST_VERSION(semaphores_queries, Version(1, 2)),
- ADD_TEST_VERSION(semaphores_order_1, Version(1, 2)),
- ADD_TEST_VERSION(semaphores_order_2, Version(1, 2)),
- ADD_TEST_VERSION(semaphores_order_3, Version(1, 2)),
ADD_TEST_VERSION(semaphores_import_export_fd, Version(1, 2)),
- ADD_TEST_VERSION(semaphores_invalid_command, Version(1, 2)),
};
const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/extensions/cl_khr_semaphore/procs.h b/test_conformance/extensions/cl_khr_semaphore/procs.h
index 06651af4..f7c1aaa3 100644
--- a/test_conformance/extensions/cl_khr_semaphore/procs.h
+++ b/test_conformance/extensions/cl_khr_semaphore/procs.h
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2017 The Khronos Group Inc.
+// Copyright (c) 2023 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -41,17 +41,7 @@ extern int test_semaphores_multi_wait(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements);
extern int test_semaphores_queries(cl_device_id deviceID, cl_context context,
cl_command_queue queue, int num_elements);
-extern int test_semaphores_order_1(cl_device_id deviceID, cl_context context,
- cl_command_queue queue, int num_elements);
-extern int test_semaphores_order_2(cl_device_id deviceID, cl_context context,
- cl_command_queue queue, int num_elements);
-extern int test_semaphores_order_3(cl_device_id deviceID, cl_context context,
- cl_command_queue queue, int num_elements);
extern int test_semaphores_import_export_fd(cl_device_id deviceID,
cl_context context,
cl_command_queue queue,
int num_elements);
-extern int test_semaphores_invalid_command(cl_device_id deviceID,
- cl_context context,
- cl_command_queue queue,
- int num_elements);
diff --git a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
index 7d03bff3..36bb8ad5 100644
--- a/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
+++ b/test_conformance/extensions/cl_khr_semaphore/test_semaphores.cpp
@@ -1,5 +1,5 @@
//
-// Copyright (c) 2022 The Khronos Group Inc.
+// Copyright (c) 2023 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -646,303 +646,6 @@ int test_semaphores_queries(cl_device_id deviceID, cl_context context,
return TEST_PASS;
}
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first one deferred wait, then one non deferred signal.
-int test_semaphores_order_1(cl_device_id deviceID, cl_context context,
- cl_command_queue defaultQueue, int num_elements)
-{
- cl_int err;
-
- if (!is_extension_available(deviceID, "cl_khr_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
- GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create semaphore
- cl_semaphore_properties_khr sema_props[] = {
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
- 0
- };
- cl_semaphore_khr sema =
- clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
- test_error(err, "Could not create semaphore");
-
- // Create user event
- clEventWrapper user_event = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Wait semaphore (dependency on user_event)
- clEventWrapper wait_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event,
- &wait_event);
- test_error(err, "Could not wait semaphore");
-
- // Signal semaphore
- clEventWrapper signal_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 0, nullptr,
- &signal_event);
- test_error(err, "Could not signal semaphore");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure signal event is completed while wait event is not
- test_assert_event_complete(signal_event);
- test_assert_event_inprogress(wait_event);
-
- // Complete user_event
- err = clSetUserEventStatus(user_event, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Finish
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- // Ensure all events are completed
- test_assert_event_complete(signal_event);
- test_assert_event_complete(wait_event);
-
- // Release semaphore
- err = clReleaseSemaphoreKHR(sema);
- test_error(err, "Could not release semaphore");
-
- return TEST_PASS;
-}
-
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first two deferred signals, then one deferred wait. Unblock
-// signal, then unblock wait. When wait completes, unblock the other signal.
-int test_semaphores_order_2(cl_device_id deviceID, cl_context context,
- cl_command_queue defaultQueue, int num_elements)
-{
- cl_int err;
-
- if (!is_extension_available(deviceID, "cl_khr_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
- GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create semaphore
- cl_semaphore_properties_khr sema_props[] = {
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
- 0
- };
- cl_semaphore_khr sema =
- clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
- test_error(err, "Could not create semaphore");
-
- // Create user events
- clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Signal semaphore (dependency on user_event_1)
- clEventWrapper signal_1_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
- &user_event_1, &signal_1_event);
- test_error(err, "Could not signal semaphore");
-
- // Signal semaphore (dependency on user_event_2)
- clEventWrapper signal_2_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
- &user_event_2, &signal_2_event);
- test_error(err, "Could not signal semaphore");
-
- // Wait semaphore (dependency on user_event_3)
- clEventWrapper wait_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3,
- &wait_event);
- test_error(err, "Could not wait semaphore");
-
- // Complete user_event_1
- err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Complete user_event_3
- err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure all events are completed except for second signal
- test_assert_event_complete(signal_1_event);
- test_assert_event_inprogress(signal_2_event);
- test_assert_event_complete(wait_event);
-
- // Complete user_event_2
- err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Finish
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- // Ensure all events are completed
- test_assert_event_complete(signal_1_event);
- test_assert_event_complete(signal_2_event);
- test_assert_event_complete(wait_event);
-
- // Release semaphore
- err = clReleaseSemaphoreKHR(sema);
- test_error(err, "Could not release semaphore");
-
- return TEST_PASS;
-}
-
-// Confirm that it is possible to enqueue a signal of wait and signal in any
-// order as soon as the submission order (after deferred dependencies) is
-// correct. Case: first two deferred signals, then two deferred waits. Unblock
-// one signal and one wait (both blocked by the same user event). When wait
-// completes, unblock the other signal. Then unblock the other wait.
-int test_semaphores_order_3(cl_device_id deviceID, cl_context context,
- cl_command_queue defaultQueue, int num_elements)
-{
- cl_int err;
-
- if (!is_extension_available(deviceID, "cl_khr_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
- GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create semaphore
- cl_semaphore_properties_khr sema_props[] = {
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
- 0
- };
- cl_semaphore_khr sema =
- clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
- test_error(err, "Could not create semaphore");
-
- // Create user events
- clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_3 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Signal semaphore (dependency on user_event_1)
- clEventWrapper signal_1_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
- &user_event_1, &signal_1_event);
- test_error(err, "Could not signal semaphore");
-
- // Signal semaphore (dependency on user_event_2)
- clEventWrapper signal_2_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema, nullptr, 1,
- &user_event_2, &signal_2_event);
- test_error(err, "Could not signal semaphore");
-
- // Wait semaphore (dependency on user_event_3)
- clEventWrapper wait_1_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_3,
- &wait_1_event);
- test_error(err, "Could not wait semaphore");
-
- // Wait semaphore (dependency on user_event_2)
- clEventWrapper wait_2_event;
- err = clEnqueueWaitSemaphoresKHR(queue, 1, &sema, nullptr, 1, &user_event_2,
- &wait_2_event);
- test_error(err, "Could not wait semaphore");
-
- // Complete user_event_2
- err = clSetUserEventStatus(user_event_2, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure only second signal and second wait completed
- cl_event event_list[] = { signal_2_event, wait_2_event };
- err = clWaitForEvents(2, event_list);
- test_error(err, "Could not wait for events");
-
- test_assert_event_inprogress(signal_1_event);
- test_assert_event_inprogress(wait_1_event);
-
- // Complete user_event_1
- err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Complete user_event_3
- err = clSetUserEventStatus(user_event_3, CL_COMPLETE);
- test_error(err, "Could not set user event to CL_COMPLETE");
-
- // Finish
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- // Ensure all events are completed
- test_assert_event_complete(signal_1_event);
- test_assert_event_complete(signal_2_event);
- test_assert_event_complete(wait_1_event);
- test_assert_event_complete(wait_2_event);
-
- // Release semaphore
- err = clReleaseSemaphoreKHR(sema);
- test_error(err, "Could not release semaphore");
-
- return TEST_PASS;
-}
-
// Test it is possible to export a semaphore to a sync fd and import the same
// sync fd to a new semaphore
int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context,
@@ -985,6 +688,8 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context,
CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
static_cast<cl_semaphore_properties_khr>(
CL_SEMAPHORE_HANDLE_SYNC_FD_KHR),
+ static_cast<cl_semaphore_properties_khr>(
+ CL_SEMAPHORE_EXPORT_HANDLE_TYPES_LIST_END_KHR),
0
};
cl_semaphore_khr sema_1 =
@@ -1040,106 +745,4 @@ int test_semaphores_import_export_fd(cl_device_id deviceID, cl_context context,
err = clReleaseSemaphoreKHR(sema_2);
test_error(err, "Could not release semaphore");
return TEST_PASS;
-}
-
-// Test that an invalid semaphore command results in the invalidation of the
-// command's event and the dependencies' events
-int test_semaphores_invalid_command(cl_device_id deviceID, cl_context context,
- cl_command_queue defaultQueue,
- int num_elements)
-{
- cl_int err;
-
- if (!is_extension_available(deviceID, "cl_khr_semaphore"))
- {
- log_info("cl_khr_semaphore is not supported on this platoform. "
- "Skipping test.\n");
- return TEST_SKIPPED_ITSELF;
- }
-
- // Obtain pointers to semaphore's API
- GET_PFN(deviceID, clCreateSemaphoreWithPropertiesKHR);
- GET_PFN(deviceID, clEnqueueSignalSemaphoresKHR);
- GET_PFN(deviceID, clEnqueueWaitSemaphoresKHR);
- GET_PFN(deviceID, clReleaseSemaphoreKHR);
-
- // Create ooo queue
- clCommandQueueWrapper queue = clCreateCommandQueue(
- context, deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &err);
- test_error(err, "Could not create command queue");
-
- // Create semaphores
- cl_semaphore_properties_khr sema_props[] = {
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_KHR),
- static_cast<cl_semaphore_properties_khr>(CL_SEMAPHORE_TYPE_BINARY_KHR),
- 0
- };
- cl_semaphore_khr sema_1 =
- clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
- test_error(err, "Could not create semaphore");
-
- cl_semaphore_khr sema_2 =
- clCreateSemaphoreWithPropertiesKHR(context, sema_props, &err);
- test_error(err, "Could not create semaphore");
-
- // Create user events
- clEventWrapper user_event_1 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- clEventWrapper user_event_2 = clCreateUserEvent(context, &err);
- test_error(err, "Could not create user event");
-
- // Signal semaphore_1 (dependency on user_event_1)
- clEventWrapper signal_1_event;
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 1,
- &user_event_1, &signal_1_event);
- test_error(err, "Could not signal semaphore");
-
- // Wait semaphore_1 and semaphore_2 (dependency on user_event_1)
- clEventWrapper wait_event;
- cl_semaphore_khr sema_list[] = { sema_1, sema_2 };
- err = clEnqueueWaitSemaphoresKHR(queue, 2, sema_list, nullptr, 1,
- &user_event_1, &wait_event);
- test_error(err, "Could not wait semaphore");
-
- // Signal semaphore_1 (dependency on wait_event and user_event_2)
- clEventWrapper signal_2_event;
- cl_event wait_list[] = { user_event_2, wait_event };
- err = clEnqueueSignalSemaphoresKHR(queue, 1, &sema_1, nullptr, 2, wait_list,
- &signal_2_event);
- test_error(err, "Could not signal semaphore");
-
- // Flush and delay
- err = clFlush(queue);
- test_error(err, "Could not flush queue");
- std::this_thread::sleep_for(std::chrono::seconds(FLUSH_DELAY_S));
-
- // Ensure all events are not completed
- test_assert_event_inprogress(signal_1_event);
- test_assert_event_inprogress(signal_2_event);
- test_assert_event_inprogress(wait_event);
-
- // Complete user_event_1 (expect failure as waiting on semaphore_2 is not
- // allowed (unsignaled)
- err = clSetUserEventStatus(user_event_1, CL_COMPLETE);
- test_assert_error(err != CL_SUCCESS,
- "signal_2_event completed unexpectedly");
-
- // Ensure signal_1 is completed while others failed (the second signal
- // should fail as it depends on wait)
- err = clFinish(queue);
- test_error(err, "Could not finish queue");
-
- test_assert_event_complete(signal_1_event);
- test_assert_event_terminated(wait_event);
- test_assert_event_terminated(signal_2_event);
-
- // Release semaphore
- err = clReleaseSemaphoreKHR(sema_1);
- test_error(err, "Could not release semaphore");
-
- err = clReleaseSemaphoreKHR(sema_2);
- test_error(err, "Could not release semaphore");
-
- return TEST_PASS;
} \ No newline at end of file
diff --git a/test_conformance/geometrics/CMakeLists.txt b/test_conformance/geometrics/CMakeLists.txt
index 3fee05fb..8a6f25c6 100644
--- a/test_conformance/geometrics/CMakeLists.txt
+++ b/test_conformance/geometrics/CMakeLists.txt
@@ -6,5 +6,7 @@ set(${MODULE_NAME}_SOURCES
test_geometrics.cpp
)
+set_gnulike_module_compile_flags("-Wno-sign-compare")
+
include(../CMakeCommon.txt)
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 4d721296..69d00a1a 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -571,6 +571,7 @@ static int test_image_format_write(cl_context context, cl_command_queue queue,
"%s (%s):%d",
GetGLTargetName(target), __FUNCTION__, __FILE__,
__LINE__);
+ return -1;
}
// If there was a problem during creation, make sure it isn't a known
diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index 3e0b60d9..888ca6ec 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -519,32 +519,53 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d
if( gDebugTrace )
log_info( " - Scanline verification...\n" );
- size_t thirdDim;
- size_t secondDim;
- if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
- {
- secondDim = dstImageInfo->arraySize;
- thirdDim = 1;
- }
- else if (dstImageInfo->type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+ size_t thirdDim = 1;
+ size_t secondDim = 1;
+
+ switch (dstImageInfo->type)
{
- secondDim = dstImageInfo->height;
- if( gTestMipmaps )
- secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1;
- thirdDim = dstImageInfo->arraySize;
+ case CL_MEM_OBJECT_IMAGE1D_ARRAY: {
+ secondDim = dstImageInfo->arraySize;
+ break;
+ }
+ case CL_MEM_OBJECT_IMAGE2D_ARRAY: {
+ secondDim = dstImageInfo->height;
+ thirdDim = dstImageInfo->arraySize;
+ break;
+ }
+ case CL_MEM_OBJECT_IMAGE3D: {
+ secondDim = dstImageInfo->height;
+ thirdDim = dstImageInfo->depth;
+ break;
+ }
+ case CL_MEM_OBJECT_IMAGE2D: {
+ secondDim = dstImageInfo->height;
+ break;
+ }
+ case CL_MEM_OBJECT_IMAGE1D: {
+ break;
+ }
+ default: {
+ log_error("ERROR: Unsupported Image type. \n");
+ return error;
+ break;
+ }
}
- else
+ if (gTestMipmaps)
{
- secondDim = dstImageInfo->height;
- thirdDim = dstImageInfo->depth;
- if( gTestMipmaps )
+ switch (dstImageInfo->type)
{
- secondDim = (dstImageInfo->height >> dst_lod) ? (dstImageInfo->height >> dst_lod):1;
- if(dstImageInfo->type == CL_MEM_OBJECT_IMAGE3D)
+ case CL_MEM_OBJECT_IMAGE3D:
thirdDim = (dstImageInfo->depth >> dst_lod) ? (dstImageInfo->depth >> dst_lod):1;
+ /* Fallthrough */
+ case CL_MEM_OBJECT_IMAGE2D:
+ case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+ secondDim = (dstImageInfo->height >> dst_lod)
+ ? (dstImageInfo->height >> dst_lod)
+ : 1;
+ break;
}
}
-
for( size_t z = 0; z < thirdDim; z++ )
{
for( size_t y = 0; y < secondDim; y++ )
diff --git a/test_conformance/images/clCopyImage/test_loops.cpp b/test_conformance/images/clCopyImage/test_loops.cpp
index 6ee1e536..e839cfdf 100644
--- a/test_conformance/images/clCopyImage/test_loops.cpp
+++ b/test_conformance/images/clCopyImage/test_loops.cpp
@@ -41,60 +41,52 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q
}
}
- if( testMethod == k1D )
+ switch (testMethod)
{
- name = "1D -> 1D";
- imageType = CL_MEM_OBJECT_IMAGE1D;
- }
- else if( testMethod == k2D )
- {
- name = "2D -> 2D";
- imageType = CL_MEM_OBJECT_IMAGE2D;
- }
- else if( testMethod == k3D )
- {
- name = "3D -> 3D";
- imageType = CL_MEM_OBJECT_IMAGE3D;
- }
- else if( testMethod == k1DArray )
- {
- name = "1D array -> 1D array";
- imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
- }
- else if( testMethod == k2DArray )
- {
- name = "2D array -> 2D array";
- imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
- }
- else if( testMethod == k2DTo3D )
- {
- name = "2D -> 3D";
- imageType = CL_MEM_OBJECT_IMAGE3D;
- }
- else if( testMethod == k3DTo2D )
- {
- name = "3D -> 2D";
- imageType = CL_MEM_OBJECT_IMAGE3D;
- }
- else if( testMethod == k2DArrayTo2D )
- {
- name = "2D array -> 2D";
- imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
- }
- else if( testMethod == k2DTo2DArray )
- {
- name = "2D -> 2D array";
- imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
- }
- else if( testMethod == k2DArrayTo3D )
- {
- name = "2D array -> 3D";
- imageType = CL_MEM_OBJECT_IMAGE3D;
- }
- else if( testMethod == k3DTo2DArray )
- {
- name = "3D -> 2D array";
- imageType = CL_MEM_OBJECT_IMAGE3D;
+ case k1D:
+ name = "1D -> 1D";
+ imageType = CL_MEM_OBJECT_IMAGE1D;
+ break;
+ case k2D:
+ name = "2D -> 2D";
+ imageType = CL_MEM_OBJECT_IMAGE2D;
+ break;
+ case k3D:
+ name = "3D -> 3D";
+ imageType = CL_MEM_OBJECT_IMAGE3D;
+ break;
+ case k1DArray:
+ name = "1D array -> 1D array";
+ imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+ break;
+ case k2DArray:
+ name = "2D array -> 2D array";
+ imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+ break;
+ case k2DTo3D:
+ name = "2D -> 3D";
+ imageType = CL_MEM_OBJECT_IMAGE3D;
+ break;
+ case k3DTo2D:
+ name = "3D -> 2D";
+ imageType = CL_MEM_OBJECT_IMAGE3D;
+ break;
+ case k2DArrayTo2D:
+ name = "2D array -> 2D";
+ imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+ break;
+ case k2DTo2DArray:
+ name = "2D -> 2D array";
+ imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+ break;
+ case k2DArrayTo3D:
+ name = "2D array -> 3D";
+ imageType = CL_MEM_OBJECT_IMAGE3D;
+ break;
+ case k3DTo2DArray:
+ name = "3D -> 2D array";
+ imageType = CL_MEM_OBJECT_IMAGE3D;
+ break;
}
if(gTestMipmaps)
diff --git a/test_conformance/images/clFillImage/test_loops.cpp b/test_conformance/images/clFillImage/test_loops.cpp
index 759f48d2..126ea0eb 100644
--- a/test_conformance/images/clFillImage/test_loops.cpp
+++ b/test_conformance/images/clFillImage/test_loops.cpp
@@ -33,35 +33,34 @@ int test_image_type( cl_device_id device, cl_context context, cl_command_queue q
cl_mem_object_type imageType;
test_func test_fn;
- if ( testMethod == k1D )
+ switch (testMethod)
{
- name = "1D Image Fill";
- imageType = CL_MEM_OBJECT_IMAGE1D;
- test_fn = &test_fill_image_set_1D;
- }
- else if ( testMethod == k2D )
- {
- name = "2D Image Fill";
- imageType = CL_MEM_OBJECT_IMAGE2D;
- test_fn = &test_fill_image_set_2D;
- }
- else if ( testMethod == k1DArray )
- {
- name = "1D Image Array Fill";
- imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
- test_fn = &test_fill_image_set_1D_array;
- }
- else if ( testMethod == k2DArray )
- {
- name = "2D Image Array Fill";
- imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
- test_fn = &test_fill_image_set_2D_array;
- }
- else if ( testMethod == k3D )
- {
- name = "3D Image Fill";
- imageType = CL_MEM_OBJECT_IMAGE3D;
- test_fn = &test_fill_image_set_3D;
+ case k1D:
+ name = "1D Image Fill";
+ imageType = CL_MEM_OBJECT_IMAGE1D;
+ test_fn = &test_fill_image_set_1D;
+ break;
+ case k2D:
+ name = "2D Image Fill";
+ imageType = CL_MEM_OBJECT_IMAGE2D;
+ test_fn = &test_fill_image_set_2D;
+ break;
+ case k1DArray:
+ name = "1D Image Array Fill";
+ imageType = CL_MEM_OBJECT_IMAGE1D_ARRAY;
+ test_fn = &test_fill_image_set_1D_array;
+ break;
+ case k2DArray:
+ name = "2D Image Array Fill";
+ imageType = CL_MEM_OBJECT_IMAGE2D_ARRAY;
+ test_fn = &test_fill_image_set_2D_array;
+ break;
+ case k3D:
+ name = "3D Image Fill";
+ imageType = CL_MEM_OBJECT_IMAGE3D;
+ test_fn = &test_fill_image_set_3D;
+ break;
+ default: log_error("Unhandled method\n"); return -1;
}
log_info( "Running %s tests...\n", name );
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index b5527c74..d7e7eded 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -21,7 +21,7 @@ set(${MODULE_NAME}_SOURCES
# Make unused variables not fatal in this module; see
# https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
-set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable")
+set_gnulike_module_compile_flags("-Wno-error=unused-variable -Wno-unused-but-set-variable -Wno-sign-compare")
include(../../CMakeCommon.txt)
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
index c6646330..887c9dca 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
@@ -14,8 +14,8 @@
// limitations under the License.
//
-#ifndef _TEST_CL_EXT_IMAGE_BUFFER
-#define _TEST_CL_EXT_IMAGE_BUFFER
+#ifndef TEST_CL_EXT_IMAGE_BUFFER
+#define TEST_CL_EXT_IMAGE_BUFFER
#define TEST_IMAGE_SIZE 20
@@ -48,8 +48,10 @@ static inline size_t get_format_size(cl_context context,
cl_image_desc image_desc = { 0 };
image_desc.image_type = imageType;
- /* Size 1 only to query element size */
- image_desc.image_width = 1;
+ /* We use a width of 4 to query element size, as this is
+ the smallest possible value that satisfies the requirements
+ of all image formats (including extensions). */
+ image_desc.image_width = 4;
if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
&& CL_MEM_OBJECT_IMAGE1D != imageType)
{
@@ -121,4 +123,4 @@ static inline void image_desc_init(cl_image_desc* desc,
}
}
-#endif /* _TEST_CL_EXT_IMAGE_BUFFER */ \ No newline at end of file
+#endif // TEST_CL_EXT_IMAGE_BUFFER
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 32814026..a221f05a 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -45,4 +45,6 @@ set(${MODULE_NAME}_SOURCES
# warnings), but other tests not (yet); so enable -Wall locally.
set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas")
+add_cxx_flag_if_supported(-ffp-contract=off)
+
include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index 3d6ce152..953c33bb 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -50,11 +50,6 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
- // This test is not using ThreadPool so we need to disable FTZ here
- // for reference computations
- FPU_mode_type oldMode;
- DisableFTZ(&oldMode);
-
Force64BitFPUPrecision();
// Init the kernels
@@ -227,6 +222,5 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
vlog("\n");
exit:
- RestoreFPState(&oldMode);
return error;
}
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 94ebc66a..0ce37cc8 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -49,11 +49,6 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
- // This test is not using ThreadPool so we need to disable FTZ here
- // for reference computations
- FPU_mode_type oldMode;
- DisableFTZ(&oldMode);
-
Force64BitFPUPrecision();
// Init the kernels
@@ -225,6 +220,5 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
vlog("\n");
exit:
- RestoreFPState(&oldMode);
return error;
}
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 53679788..34f49a5a 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -81,7 +81,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
int ftz = job->ftz;
bool relaxedMode = job->relaxedMode;
cl_int error = CL_SUCCESS;
- cl_int ret = CL_SUCCESS;
const char *name = job->f->name;
int signbit_test = 0;
@@ -245,8 +244,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (q[j] > t[j]) err = q[j] - t[j];
vlog_error("\nERROR: %s: %d ulp error at %a: *%d vs. %d\n",
name, err, ((float *)s)[j], t[j], q[j]);
- error = -1;
- goto exit;
+ return -1;
}
@@ -272,15 +270,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
vlog_error(
"\nERROR: %s%s: %d ulp error at %a: *%d vs. %d\n", name,
sizeNames[k], err, ((float *)s)[j], -t[j], q[j]);
- error = -1;
- goto exit;
+ return -1;
}
}
}
}
-exit:
- ret = error;
for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
{
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
@@ -315,7 +310,7 @@ exit:
fflush(stdout);
}
- return ret;
+ return CL_SUCCESS;
}
} // anonymous namespace
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 74c5a160..8d423408 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -189,12 +189,11 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
// Get that moving
if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
- FPU_mode_type oldMode;
+ FPU_mode_type oldMode = 0;
RoundingMode oldRoundMode = kRoundToNearestEven;
if (isFract)
{
// Calculate the correctly rounded reference result
- memset(&oldMode, 0, sizeof(oldMode));
if (ftz || relaxedMode) ForceFTZ(&oldMode);
// Set the rounding mode to match the device
diff --git a/test_conformance/mem_host_flags/C_host_memory_block.h b/test_conformance/mem_host_flags/C_host_memory_block.h
index 78692d17..0784c2c2 100644
--- a/test_conformance/mem_host_flags/C_host_memory_block.h
+++ b/test_conformance/mem_host_flags/C_host_memory_block.h
@@ -24,14 +24,14 @@
template <class T> class C_host_memory_block {
public:
- int num_elements;
+ size_t num_elements;
int element_size;
T *pData;
C_host_memory_block();
~C_host_memory_block();
- void Init(int num_elem, T &value);
- void Init(int num_elem);
+ void Init(size_t num_elem, T &value);
+ void Init(size_t num_elem);
void Set_to(T &val);
void Set_to_zero();
bool Equal_to(T &val);
@@ -40,7 +40,7 @@ public:
bool Equal_rect(C_host_memory_block<T> &another, size_t *host_origin,
size_t *region, size_t host_row_pitch,
size_t host_slice_pitch);
- bool Equal(T *pData, int num_elements);
+ bool Equal(T *pData, size_t num_elements);
bool Equal_rect_from_orig(C_host_memory_block<T> &another, size_t *soffset,
size_t *region, size_t host_row_pitch,
@@ -63,20 +63,20 @@ template <class T> C_host_memory_block<T>::~C_host_memory_block()
num_elements = 0;
}
-template <class T> void C_host_memory_block<T>::Init(int num_elem, T &value)
+template <class T> void C_host_memory_block<T>::Init(size_t num_elem, T &value)
{
if (pData != NULL) delete[] pData;
pData = new T[num_elem];
- for (int i = 0; i < num_elem; i++) pData[i] = value;
+ for (size_t i = 0; i < num_elem; i++) pData[i] = value;
num_elements = num_elem;
}
-template <class T> void C_host_memory_block<T>::Init(int num_elem)
+template <class T> void C_host_memory_block<T>::Init(size_t num_elem)
{
if (pData != NULL) delete[] pData;
pData = new T[num_elem];
- for (int i = 0; i < num_elem; i++) pData[i] = (T)i;
+ for (size_t i = 0; i < num_elem; i++) pData[i] = (T)i;
num_elements = num_elem;
}
@@ -88,14 +88,14 @@ template <class T> void C_host_memory_block<T>::Set_to_zero()
template <class T> void C_host_memory_block<T>::Set_to(T &val)
{
- for (int i = 0; i < num_elements; i++) pData[i] = val;
+ for (size_t i = 0; i < num_elements; i++) pData[i] = val;
}
template <class T> bool C_host_memory_block<T>::Equal_to(T &val)
{
- int count = 0;
+ size_t count = 0;
- for (int i = 0; i < num_elements; i++)
+ for (size_t i = 0; i < num_elements; i++)
{
if (pData[i] == val) count++;
}
@@ -106,9 +106,9 @@ template <class T> bool C_host_memory_block<T>::Equal_to(T &val)
template <class T>
bool C_host_memory_block<T>::Equal(C_host_memory_block<T> &another)
{
- int count = 0;
+ size_t count = 0;
- for (int i = 0; i < num_elements; i++)
+ for (size_t i = 0; i < num_elements; i++)
{
if (pData[i] == another.pData[i]) count++;
}
@@ -117,13 +117,13 @@ bool C_host_memory_block<T>::Equal(C_host_memory_block<T> &another)
}
template <class T>
-bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements)
+bool C_host_memory_block<T>::Equal(T *pIn_Data, size_t Innum_elements)
{
if (this->num_elements != Innum_elements) return false;
- int count = 0;
+ size_t count = 0;
- for (int i = 0; i < num_elements; i++)
+ for (size_t i = 0; i < num_elements; i++)
{
if (pData[i] == pIn_Data[i]) count++;
}
@@ -134,7 +134,7 @@ bool C_host_memory_block<T>::Equal(T *pIn_Data, int Innum_elements)
template <class T> size_t C_host_memory_block<T>::Count(T &val)
{
size_t count = 0;
- for (int i = 0; i < num_elements; i++)
+ for (size_t i = 0; i < num_elements; i++)
{
if (pData[i] == val) count++;
}
diff --git a/test_conformance/mem_host_flags/checker.h b/test_conformance/mem_host_flags/checker.h
index 835f120b..0bb826f4 100644
--- a/test_conformance/mem_host_flags/checker.h
+++ b/test_conformance/mem_host_flags/checker.h
@@ -219,7 +219,7 @@ cl_int cBuffer_checker<T>::SetupASSubBuffer(cl_mem_flags parent_buffer_flag)
err = CL_SUCCESS;
}
- cl_mem_flags f;
+ cl_mem_flags f = 0;
if (parent_buffer_flag & CL_MEM_HOST_READ_ONLY)
f = CL_MEM_HOST_READ_ONLY;
else if (parent_buffer_flag & CL_MEM_HOST_WRITE_ONLY)
diff --git a/test_conformance/non_uniform_work_group/CMakeLists.txt b/test_conformance/non_uniform_work_group/CMakeLists.txt
index f78dd195..30c3a846 100644
--- a/test_conformance/non_uniform_work_group/CMakeLists.txt
+++ b/test_conformance/non_uniform_work_group/CMakeLists.txt
@@ -10,8 +10,6 @@ set(${MODULE_NAME}_SOURCES
tools.cpp
)
-set_gnulike_module_compile_flags("-Wno-unused-but-set-variable")
-
include(../CMakeCommon.txt)
# end of file #
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
index a4a6a744..44781ca8 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.cpp
@@ -448,13 +448,8 @@ void TestNonUniformWorkGroup::verifyData (DataContainerAttrib * reference, DataC
}
void TestNonUniformWorkGroup::calculateExpectedValues () {
- size_t nonRemainderGlobalSize[MAX_DIMS];
size_t numberOfPossibleRegions[MAX_DIMS];
- nonRemainderGlobalSize[0] = _globalSize[0] - (_globalSize[0] % _enqueuedLocalSize[0]);
- nonRemainderGlobalSize[1] = _globalSize[1] - (_globalSize[1] % _enqueuedLocalSize[1]);
- nonRemainderGlobalSize[2] = _globalSize[2] - (_globalSize[2] % _enqueuedLocalSize[2]);
-
numberOfPossibleRegions[0] = (_globalSize[0]>1)?2:1;
numberOfPossibleRegions[1] = (_globalSize[1]>1)?2:1;
numberOfPossibleRegions[2] = (_globalSize[2]>1)?2:1;
@@ -502,6 +497,11 @@ size_t TestNonUniformWorkGroup::getMaxLocalWorkgroupSize (const cl_device_id &de
if (TestNonUniformWorkGroup::_maxLocalWorkgroupSize == 0) {
err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
sizeof(TestNonUniformWorkGroup::_maxLocalWorkgroupSize), &TestNonUniformWorkGroup::_maxLocalWorkgroupSize, NULL);
+ if (err)
+ {
+ log_error("clGetDeviceInfo failed\n");
+ return 0;
+ }
}
return TestNonUniformWorkGroup::_maxLocalWorkgroupSize;
diff --git a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
index 414d1004..f5846061 100644
--- a/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
+++ b/test_conformance/non_uniform_work_group/TestNonUniformWorkGroup.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _TESTNONUNIFORMWORKGROUP_H
-#define _TESTNONUNIFORMWORKGROUP_H
+#ifndef TESTNONUNIFORMWORKGROUP_H
+#define TESTNONUNIFORMWORKGROUP_H
#include "procs.h"
#include <vector>
@@ -147,5 +147,4 @@ private:
unsigned int _overallCounter;
};
-#endif // _TESTNONUNIFORMWORKGROUP_H
-
+#endif // TESTNONUNIFORMWORKGROUP_H
diff --git a/test_conformance/non_uniform_work_group/tools.h b/test_conformance/non_uniform_work_group/tools.h
index 2e63c3dd..ba01fc99 100644
--- a/test_conformance/non_uniform_work_group/tools.h
+++ b/test_conformance/non_uniform_work_group/tools.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _TOOLS_H
-#define _TOOLS_H
+#ifndef TOOLS_H
+#define TOOLS_H
#include "procs.h"
#include <vector>
@@ -106,4 +106,4 @@ namespace Error {
};
}
-#endif // _TOOLS_H
+#endif // TOOLS_H
diff --git a/test_conformance/pipes/kernels.h b/test_conformance/pipes/kernels.h
index a2fb70c0..a897e5e8 100644
--- a/test_conformance/pipes/kernels.h
+++ b/test_conformance/pipes/kernels.h
@@ -13,8 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#ifndef _KERNELS_H_
-#define _KERNELS_H_
+#ifndef KERNELS_H_
+#define KERNELS_H_
static const char* pipe_readwrite_struct_kernel_code = {
"typedef struct{\n"
@@ -127,4 +127,4 @@ static const char* pipe_convenience_readwrite_struct_kernel_code = {
" read_pipe(in_pipe, &dst[gid]);\n"
"}\n" };
-#endif //_KERNELS_H_
+#endif // KERNELS_H_
diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index e1048f5f..76b80b15 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp
@@ -274,8 +274,7 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm
size_t global_work_size[3];
cl_int err;
size_t size;
- int num_pipe_elements = 1024;
- int i;
+ cl_uint num_pipe_elements = 1024;
cl_uint max_pipe_packet_size;
clEventWrapper producer_sync_event = NULL;
clEventWrapper consumer_sync_event = NULL;
@@ -287,7 +286,7 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm
size_t min_alignment = get_min_alignment(context);
- global_work_size[0] = (cl_uint)num_pipe_elements;
+ global_work_size[0] = num_pipe_elements;
std::stringstream source;
@@ -312,7 +311,8 @@ int test_pipe_max_packet_size(cl_device_id deviceID, cl_context context, cl_comm
inptr = (cl_char *)align_malloc(size, min_alignment);
- for(i = 0; i < size; i++){
+ for (size_t i = 0; i < size; i++)
+ {
inptr[i] = (char)genrand_int32(d);
}
BufferInPtr.reset(inptr, nullptr, 0, size, true);
@@ -412,7 +412,7 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context,
clMemWrapper buf_reserve_id_t_size_aligned;
cl_int *inptr;
void *outptr;
- int size, i;
+ int size;
clProgramWrapper program;
clKernelWrapper kernel[3];
size_t global_work_size[3];
@@ -565,7 +565,8 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context,
size = sizeof(cl_int) * max_active_reservations;
inptr = (cl_int *)align_malloc(size, min_alignment);
- for(i = 0; i < max_active_reservations; i++){
+ for (cl_uint i = 0; i < max_active_reservations; i++)
+ {
inptr[i] = (int)genrand_int32(d);
}
BufferInPtr.reset(inptr, nullptr, 0, size, true);
diff --git a/test_conformance/relationals/test_comparisons_fp.cpp b/test_conformance/relationals/test_comparisons_fp.cpp
index 580b7422..73ff3dd9 100644
--- a/test_conformance/relationals/test_comparisons_fp.cpp
+++ b/test_conformance/relationals/test_comparisons_fp.cpp
@@ -14,12 +14,16 @@
// limitations under the License.
//
+#include <cstdint>
+#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <stdexcept>
#include <vector>
+#include "harness/stringHelpers.h"
+
#include <CL/cl_half.h>
#include "test_comparisons_fp.h"
@@ -81,29 +85,6 @@ extension,
// clang-format on
-std::string concat_kernel(const char* sstr[], int num)
-{
- std::string res;
- for (int i = 0; i < num; i++) res += std::string(sstr[i]);
- return res;
-}
-
-template <typename... Args>
-std::string string_format(const std::string& format, Args... args)
-{
- int size_s = std::snprintf(nullptr, 0, format.c_str(), args...)
- + 1; // Extra space for '\0'
- if (size_s <= 0)
- {
- throw std::runtime_error("Error during formatting.");
- }
- auto size = static_cast<size_t>(size_s);
- std::unique_ptr<char[]> buf(new char[size]);
- std::snprintf(buf.get(), size, format.c_str(), args...);
- return std::string(buf.get(),
- buf.get() + size - 1); // We don't want the '\0' inside
-}
-
template <typename T, typename F> bool verify(const T& A, const T& B)
{
return F()(A, B);
@@ -224,14 +205,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize,
auto str =
concat_kernel(equivTestKerPat_3,
sizeof(equivTestKerPat_3) / sizeof(const char*));
- kernelSource = string_format(str, fnName.c_str(), opName.c_str());
+ kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str());
}
else
{
auto str = concat_kernel(equivTestKerPatLessGreater_3,
sizeof(equivTestKerPatLessGreater_3)
/ sizeof(const char*));
- kernelSource = string_format(str, fnName.c_str());
+ kernelSource = str_sprintf(str, fnName.c_str());
}
}
else
@@ -241,14 +222,14 @@ int RelationalsFPTest::test_equiv_kernel(unsigned int vecSize,
auto str =
concat_kernel(equivTestKernPat,
sizeof(equivTestKernPat) / sizeof(const char*));
- kernelSource = string_format(str, fnName.c_str(), opName.c_str());
+ kernelSource = str_sprintf(str, fnName.c_str(), opName.c_str());
}
else
{
auto str = concat_kernel(equivTestKernPatLessGreater,
sizeof(equivTestKernPatLessGreater)
/ sizeof(const char*));
- kernelSource = string_format(str, fnName.c_str());
+ kernelSource = str_sprintf(str, fnName.c_str());
}
}
diff --git a/test_conformance/relationals/test_comparisons_fp.h b/test_conformance/relationals/test_comparisons_fp.h
index 7faca1c5..3401163e 100644
--- a/test_conformance/relationals/test_comparisons_fp.h
+++ b/test_conformance/relationals/test_comparisons_fp.h
@@ -14,8 +14,8 @@
// limitations under the License.
//
-#ifndef _TEST_COMPARISONS_FP_H
-#define _TEST_COMPARISONS_FP_H
+#ifndef TEST_COMPARISONS_FP_H
+#define TEST_COMPARISONS_FP_H
#include <map>
#include <memory>
@@ -32,6 +32,7 @@ template <typename T> using VerifyFunc = bool (*)(const T &, const T &);
struct RelTestBase
{
explicit RelTestBase(const ExplicitTypes &dt): dataType(dt) {}
+ virtual ~RelTestBase() = default;
ExplicitTypes dataType;
};
@@ -224,4 +225,4 @@ int MakeAndRunTest(cl_device_id device, cl_context context,
return TEST_PASS;
}
-#endif // _TEST_COMPARISONS_FP_H
+#endif // TEST_COMPARISONS_FP_H
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 7fa3bc08..72be08c7 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -14,11 +14,16 @@
// limitations under the License.
//
#include "harness/compat.h"
+#include "harness/typeWrappers.h"
#include <assert.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
+
+#include <cinttypes>
+#include <vector>
+
#if ! defined( _WIN32)
#if defined(__APPLE__)
#include <sys/sysctl.h>
@@ -42,11 +47,14 @@ static void initSrcBuffer(void* src1, Type stype, MTdata);
// initialize the valued used to compare with in the select with
// vlaues [start, count)
-static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count);
+static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start,
+ const size_t count);
// make a program that uses select for the given stype (src/dest type),
// ctype (comparison type), veclen (vector length)
-static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type stype, Type ctype, size_t veclen );
+static cl_program makeSelectProgram(cl_kernel *kernel_ptr, cl_context context,
+ Type stype, Type ctype,
+ const size_t veclen);
// Creates and execute the select test for the given device, context,
// stype (source/dest type), cmptype (comparison type), using max_tg_size
@@ -66,6 +74,16 @@ static void printUsage( void );
#define BUFFER_SIZE (1024*1024)
#define KPAGESIZE 4096
+#define test_error_count(errCode, msg) \
+ { \
+ auto errCodeResult = errCode; \
+ if (errCodeResult != CL_SUCCESS) \
+ { \
+ gFailCount++; \
+ print_error(errCodeResult, msg); \
+ return errCode; \
+ } \
+ }
// When we indicate non wimpy mode, the types that are 32 bits value will
// test their entire range and 64 bits test will test the 32 bit
@@ -74,12 +92,6 @@ static void printUsage( void );
static bool s_wimpy_mode = false;
static int s_wimpy_reduction_factor = 256;
-// Tests are broken into the major test which is based on the
-// src and cmp type and their corresponding vector types and
-// sub tests which is for each individual test. The following
-// tracks the subtests
-int s_test_cnt = 0;
-
//-----------------------------------------
// Static helper functions
//-----------------------------------------
@@ -112,36 +124,37 @@ static void initSrcBuffer(void* src1, Type stype, MTdata d)
s1[i] = genrand_int32(d);
}
-static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count) {
- int i;
+static void initCmpBuffer(void *cmp, Type cmptype, uint64_t start,
+ const size_t count)
+
+{
assert(cmptype != kfloat);
switch (type_size[cmptype]) {
case 1: {
uint8_t* ub = (uint8_t *)cmp;
- for (i=0; i < count; ++i)
- ub[i] = (uint8_t)start++;
+ for (size_t i = 0; i < count; ++i) ub[i] = (uint8_t)start++;
break;
}
case 2: {
uint16_t* us = (uint16_t *)cmp;
- for (i=0; i < count; ++i)
- us[i] = (uint16_t)start++;
+ for (size_t i = 0; i < count; ++i) us[i] = (uint16_t)start++;
break;
}
case 4: {
if (!s_wimpy_mode) {
uint32_t* ui = (uint32_t *)cmp;
- for (i=0; i < count; ++i)
- ui[i] = (uint32_t)start++;
+ for (size_t i = 0; i < count; ++i) ui[i] = (uint32_t)start++;
}
else {
// The short test doesn't iterate over the entire 32 bit space so
// we alternate between positive and negative values
int32_t* ui = (int32_t *)cmp;
- int32_t sign = 1;
- for (i=0; i < count; ++i, ++start) {
- ui[i] = (int32_t)start*sign;
- sign = sign * -1;
+ int32_t neg_start = (int32_t)start * -1;
+ for (size_t i = 0; i < count; i++)
+ {
+ ++start;
+ --neg_start;
+ ui[i] = (int32_t)((i % 2) ? start : neg_start);
}
}
break;
@@ -150,10 +163,12 @@ static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count)
// We don't iterate over the entire space of 64 bit so for the
// selects, we want to test positive and negative values
int64_t* ll = (int64_t *)cmp;
- int64_t sign = 1;
- for (i=0; i < count; ++i, ++start) {
- ll[i] = start*sign;
- sign = sign * -1;
+ int64_t neg_start = (int64_t)start * -1;
+ for (size_t i = 0; i < count; i++)
+ {
+ ++start;
+ --neg_start;
+ ll[i] = (int64_t)((i % 2) ? start : neg_start);
}
break;
}
@@ -165,7 +180,9 @@ static void initCmpBuffer(void* cmp, Type cmptype, uint64_t start, size_t count)
// Make the various incarnations of the program we want to run
// stype: source and destination type for the select
// ctype: compare type
-static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context context, Type srctype, Type cmptype, size_t vec_len)
+static cl_program makeSelectProgram(cl_kernel *kernel_ptr,
+ const cl_context context, Type srctype,
+ Type cmptype, const size_t vec_len)
{
char testname[256];
char stypename[32];
@@ -237,6 +254,9 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
if (srctype == kdouble)
strcpy( extension, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" );
+ if (srctype == khalf)
+ strcpy(extension, "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n");
+
// create type name and testname
switch( vec_len )
{
@@ -288,39 +308,38 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
return program;
}
-
#define VECTOR_SIZE_COUNT 6
static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device)
{
int err = CL_SUCCESS;
- int s_test_fail = 0;
- MTdataHolder d;
+ MTdataHolder d(gRandomSeed);
const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
- cl_mem src1 = NULL;
- cl_mem src2 = NULL;
- cl_mem cmp = NULL;
- cl_mem dest = NULL;
- void *ref = NULL;
- void *sref = NULL;
+ clMemWrapper src1, src2, cmp, dest;
cl_ulong blocks = type_size[stype] * 0x100000000ULL / BUFFER_SIZE;
- size_t block_elements = BUFFER_SIZE / type_size[stype];
+ const size_t block_elements = BUFFER_SIZE / type_size[stype];
size_t step = s_wimpy_mode ? s_wimpy_reduction_factor : 1;
cl_ulong cmp_stride = block_elements * step;
// It is more efficient to create the tests all at once since we
// use the same test data on each of the vector sizes
- int vecsize;
- cl_program programs[VECTOR_SIZE_COUNT];
- cl_kernel kernels[VECTOR_SIZE_COUNT];
+ clProgramWrapper programs[VECTOR_SIZE_COUNT];
+ clKernelWrapper kernels[VECTOR_SIZE_COUNT];
- if(stype == kdouble && ! is_extension_available( device, "cl_khr_fp64" ))
+ if (stype == kdouble && !is_extension_available(device, "cl_khr_fp64"))
{
log_info("Skipping double because cl_khr_fp64 extension is not supported.\n");
return 0;
}
+ if (stype == khalf && !is_extension_available(device, "cl_khr_fp16"))
+ {
+ log_info(
+ "Skipping half because cl_khr_fp16 extension is not supported.\n");
+ return 0;
+ }
+
if (gIsEmbedded)
{
if (( stype == klong || stype == kulong ) && ! is_extension_available( device, "cles_khr_int64" ))
@@ -336,29 +355,51 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
}
}
- for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize)
- {
- programs[vecsize] = makeSelectProgram(&kernels[vecsize], context, stype, cmptype, element_count[vecsize] );
- if (!programs[vecsize] || !kernels[vecsize]) {
- ++s_test_fail;
- ++s_test_cnt;
- return -1;
- }
- }
-
- ref = malloc( BUFFER_SIZE );
- if( NULL == ref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; }
- sref = malloc( BUFFER_SIZE );
- if( NULL == sref ){ log_error("Error: could not allocate ref buffer\n" ); goto exit; }
src1 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err );
- if( err ) { log_error( "Error: could not allocate src1 buffer\n" ); ++s_test_fail; goto exit; }
+ test_error_count(err, "Error: could not allocate src1 buffer\n");
src2 = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err );
- if( err ) { log_error( "Error: could not allocate src2 buffer\n" ); ++s_test_fail; goto exit; }
+ test_error_count(err, "Error: could not allocate src2 buffer\n");
cmp = clCreateBuffer( context, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &err );
- if( err ) { log_error( "Error: could not allocate cmp buffer\n" ); ++s_test_fail; goto exit; }
+ test_error_count(err, "Error: could not allocate cmp buffer\n");
dest = clCreateBuffer( context, CL_MEM_WRITE_ONLY, BUFFER_SIZE, NULL, &err );
- if( err ) { log_error( "Error: could not allocate dest buffer\n" ); ++s_test_fail; goto exit; }
+ test_error_count(err, "Error: could not allocate dest buffer\n");
+
+ programs[0] = makeSelectProgram(&kernels[0], context, stype, cmptype,
+ element_count[0]);
+ programs[1] = makeSelectProgram(&kernels[1], context, stype, cmptype,
+ element_count[1]);
+ programs[2] = makeSelectProgram(&kernels[2], context, stype, cmptype,
+ element_count[2]);
+ programs[3] = makeSelectProgram(&kernels[3], context, stype, cmptype,
+ element_count[3]);
+ programs[4] = makeSelectProgram(&kernels[4], context, stype, cmptype,
+ element_count[4]);
+ programs[5] = makeSelectProgram(&kernels[5], context, stype, cmptype,
+ element_count[5]);
+
+ for (size_t vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize)
+ {
+ if (!programs[vecsize] || !kernels[vecsize])
+ {
+ return -1;
+ }
+
+ err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest);
+ test_error_count(err, "Error: Cannot set kernel arg dest!\n");
+ err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1);
+ test_error_count(err, "Error: Cannot set kernel arg dest!\n");
+ err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2);
+ test_error_count(err, "Error: Cannot set kernel arg dest!\n");
+ err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp);
+ test_error_count(err, "Error: Cannot set kernel arg dest!\n");
+ }
+ std::vector<char> ref(BUFFER_SIZE);
+ std::vector<char> sref(BUFFER_SIZE);
+ std::vector<char> src1_host(BUFFER_SIZE);
+ std::vector<char> src2_host(BUFFER_SIZE);
+ std::vector<char> cmp_host(BUFFER_SIZE);
+ std::vector<char> dest_host(BUFFER_SIZE);
// We block the test as we are running over the range of compare values
// "block the test" means "break the test into blocks"
@@ -368,81 +409,63 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
cmp_stride = block_elements * step * (0xffffffffffffffffULL / 0x100000000ULL + 1);
log_info("Testing...");
- d = MTdataHolder(gRandomSeed);
uint64_t i;
+
+ initSrcBuffer(src1_host.data(), stype, d);
+ initSrcBuffer(src2_host.data(), stype, d);
for (i=0; i < blocks; i+=step)
{
- void *s1 = clEnqueueMapBuffer( queue, src1, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
- if( err ){ log_error( "Error: Could not map src1" ); goto exit; }
- // Setup the input data to change for each block
- initSrcBuffer( s1, stype, d);
-
- void *s2 = clEnqueueMapBuffer( queue, src2, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
- if( err ){ log_error( "Error: Could not map src2" ); goto exit; }
- // Setup the input data to change for each block
- initSrcBuffer( s2, stype, d);
-
- void *s3 = clEnqueueMapBuffer( queue, cmp, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
- if( err ){ log_error( "Error: Could not map cmp" ); goto exit; }
- // Setup the input data to change for each block
- initCmpBuffer(s3, cmptype, i * cmp_stride, block_elements);
-
- // Create the reference result
- Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0] : vrefSelects[stype][1];
- (*sfunc)(ref, s1, s2, s3, block_elements);
-
- sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0] : refSelects[stype][1];
- (*sfunc)(sref, s1, s2, s3, block_elements);
-
- if( (err = clEnqueueUnmapMemObject( queue, src1, s1, 0, NULL, NULL )))
- { log_error( "Error: coult not unmap src1\n" ); ++s_test_fail; goto exit; }
- if( (err = clEnqueueUnmapMemObject( queue, src2, s2, 0, NULL, NULL )))
- { log_error( "Error: coult not unmap src2\n" ); ++s_test_fail; goto exit; }
- if( (err = clEnqueueUnmapMemObject( queue, cmp, s3, 0, NULL, NULL )))
- { log_error( "Error: coult not unmap cmp\n" ); ++s_test_fail; goto exit; }
-
- for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize)
+ initCmpBuffer(cmp_host.data(), cmptype, i * cmp_stride, block_elements);
+
+ err = clEnqueueWriteBuffer(queue, src1, CL_FALSE, 0, BUFFER_SIZE,
+ src1_host.data(), 0, NULL, NULL);
+ test_error_count(err, "Error: Could not write src1");
+
+ err = clEnqueueWriteBuffer(queue, src2, CL_FALSE, 0, BUFFER_SIZE,
+ src2_host.data(), 0, NULL, NULL);
+ test_error_count(err, "Error: Could not write src2");
+
+ err = clEnqueueWriteBuffer(queue, cmp, CL_FALSE, 0, BUFFER_SIZE,
+ cmp_host.data(), 0, NULL, NULL);
+ test_error_count(err, "Error: Could not write cmp");
+
+ Select sfunc = (cmptype == ctype[stype][0]) ? vrefSelects[stype][0]
+ : vrefSelects[stype][1];
+ (*sfunc)(ref.data(), src1_host.data(), src2_host.data(),
+ cmp_host.data(), block_elements);
+
+ sfunc = (cmptype == ctype[stype][0]) ? refSelects[stype][0]
+ : refSelects[stype][1];
+ (*sfunc)(sref.data(), src1_host.data(), src2_host.data(),
+ cmp_host.data(), block_elements);
+
+ for (int vecsize = 0; vecsize < VECTOR_SIZE_COUNT; ++vecsize)
{
size_t vector_size = element_count[vecsize] * type_size[stype];
size_t vector_count = (BUFFER_SIZE + vector_size - 1) / vector_size;
- if((err = clSetKernelArg(kernels[vecsize], 0, sizeof dest, &dest) ))
- { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
- if((err = clSetKernelArg(kernels[vecsize], 1, sizeof src1, &src1) ))
- { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
- if((err = clSetKernelArg(kernels[vecsize], 2, sizeof src2, &src2) ))
- { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
- if((err = clSetKernelArg(kernels[vecsize], 3, sizeof cmp, &cmp) ))
- { log_error( "Error: Cannot set kernel arg dest! %d\n", err ); ++s_test_fail; goto exit; }
-
+ const cl_int pattern = -1;
+ err = clEnqueueFillBuffer(queue, dest, &pattern, sizeof(cl_int), 0,
+ BUFFER_SIZE, 0, nullptr, nullptr);
+ test_error_count(err, "clEnqueueFillBuffer failed");
- // Wipe destination
- void *d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_WRITE, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
- if( err ){ log_error( "Error: Could not map dest" ); ++s_test_fail; goto exit; }
- memset( d, -1, BUFFER_SIZE );
- if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) ){ log_error( "Error: Could not unmap dest" ); ++s_test_fail; goto exit; }
err = clEnqueueNDRangeKernel(queue, kernels[vecsize], 1, NULL, &vector_count, NULL, 0, NULL, NULL);
- if (err != CL_SUCCESS) {
- log_error("clEnqueueNDRangeKernel failed errcode:%d\n", err);
- ++s_test_fail;
- goto exit;
- }
-
- d = clEnqueueMapBuffer( queue, dest, CL_TRUE, CL_MAP_READ, 0, BUFFER_SIZE, 0, NULL, NULL, &err );
- if( err ){ log_error( "Error: Could not map dest # 2" ); ++s_test_fail; goto exit; }
+ test_error_count(err, "clEnqueueNDRangeKernel failed errcode\n");
- if ((*checkResults[stype])(d, vecsize == 0 ? sref : ref, block_elements, element_count[vecsize])!=0){
- log_error("vec_size:%d indx: 0x%16.16llx\n", (int)element_count[vecsize], i);
- ++s_test_fail;
- goto exit;
- }
+ err = clEnqueueReadBuffer(queue, dest, CL_TRUE, 0, BUFFER_SIZE,
+ dest_host.data(), 0, NULL, NULL);
+ test_error_count(
+ err, "Error: Reading buffer from dest to dest_host failed\n");
- if( (err = clEnqueueUnmapMemObject( queue, dest, d, 0, NULL, NULL ) ) )
+ if ((*checkResults[stype])(dest_host.data(),
+ vecsize == 0 ? sref.data() : ref.data(),
+ block_elements, element_count[vecsize])
+ != 0)
{
- log_error( "Error: Could not unmap dest" );
- ++s_test_fail;
- goto exit;
+ log_error("vec_size:%d indx: 0x%16.16" PRIx64 "\n",
+ (int)element_count[vecsize], i);
+ return TEST_FAIL;
}
} // for vecsize
} // for i
@@ -452,24 +475,6 @@ static int doTest(cl_command_queue queue, cl_context context, Type stype, Type c
else
log_info(" Wimpy Passed\n\n");
-exit:
- if( src1 ) clReleaseMemObject( src1 );
- if( src2 ) clReleaseMemObject( src2 );
- if( cmp ) clReleaseMemObject( cmp );
- if( dest) clReleaseMemObject( dest );
- if( ref ) free(ref );
- if( sref ) free(sref );
-
- for (vecsize = 0; vecsize < VECTOR_SIZE_COUNT; vecsize++) {
- clReleaseKernel(kernels[vecsize]);
- clReleaseProgram(programs[vecsize]);
- }
- ++s_test_cnt;
- if (s_test_fail)
- {
- err = TEST_FAIL;
- gFailCount++;
- }
return err;
}
@@ -505,6 +510,16 @@ int test_select_short_short(cl_device_id deviceID, cl_context context, cl_comman
{
return doTest(queue, context, kshort, kshort, deviceID);
}
+int test_select_half_ushort(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return doTest(queue, context, khalf, kushort, deviceID);
+}
+int test_select_half_short(cl_device_id deviceID, cl_context context,
+ cl_command_queue queue, int num_elements)
+{
+ return doTest(queue, context, khalf, kshort, deviceID);
+}
int test_select_uint_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
{
return doTest(queue, context, kuint, kuint, deviceID);
@@ -555,26 +570,17 @@ int test_select_double_long(cl_device_id deviceID, cl_context context, cl_comman
}
test_definition test_list[] = {
- ADD_TEST( select_uchar_uchar ),
- ADD_TEST( select_uchar_char ),
- ADD_TEST( select_char_uchar ),
- ADD_TEST( select_char_char ),
- ADD_TEST( select_ushort_ushort ),
- ADD_TEST( select_ushort_short ),
- ADD_TEST( select_short_ushort ),
- ADD_TEST( select_short_short ),
- ADD_TEST( select_uint_uint ),
- ADD_TEST( select_uint_int ),
- ADD_TEST( select_int_uint ),
- ADD_TEST( select_int_int ),
- ADD_TEST( select_float_uint ),
- ADD_TEST( select_float_int ),
- ADD_TEST( select_ulong_ulong ),
- ADD_TEST( select_ulong_long ),
- ADD_TEST( select_long_ulong ),
- ADD_TEST( select_long_long ),
- ADD_TEST( select_double_ulong ),
- ADD_TEST( select_double_long ),
+ ADD_TEST(select_uchar_uchar), ADD_TEST(select_uchar_char),
+ ADD_TEST(select_char_uchar), ADD_TEST(select_char_char),
+ ADD_TEST(select_ushort_ushort), ADD_TEST(select_ushort_short),
+ ADD_TEST(select_short_ushort), ADD_TEST(select_short_short),
+ ADD_TEST(select_half_ushort), ADD_TEST(select_half_short),
+ ADD_TEST(select_uint_uint), ADD_TEST(select_uint_int),
+ ADD_TEST(select_int_uint), ADD_TEST(select_int_int),
+ ADD_TEST(select_float_uint), ADD_TEST(select_float_int),
+ ADD_TEST(select_ulong_ulong), ADD_TEST(select_ulong_long),
+ ADD_TEST(select_long_ulong), ADD_TEST(select_long_long),
+ ADD_TEST(select_double_ulong), ADD_TEST(select_double_long),
};
const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/select/test_select.h b/test_conformance/select/test_select.h
index c51ae13c..5cd78602 100644
--- a/test_conformance/select/test_select.h
+++ b/test_conformance/select/test_select.h
@@ -28,18 +28,20 @@
#endif
// Defines the set of types we support (no support for double)
-typedef enum {
+typedef enum
+{
kuchar = 0,
kchar = 1,
kushort = 2,
kshort = 3,
- kuint = 4,
- kint = 5,
- kfloat = 6,
- kulong = 7,
- klong = 8,
- kdouble = 9,
- kTypeCount // always goes last
+ khalf = 4,
+ kuint = 5,
+ kint = 6,
+ kfloat = 7,
+ kulong = 8,
+ klong = 9,
+ kdouble = 10,
+ kTypeCount // always goes last
} Type;
@@ -56,7 +58,8 @@ extern const size_t type_size[kTypeCount];
extern const Type ctype[kTypeCount][2];
// Reference functions for the primitive (non vector) type
-typedef void (*Select)(void *dest, void *src1, void *src2, void *cmp, size_t c);
+typedef void (*Select)(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t c);
extern Select refSelects[kTypeCount][2];
// Reference functions for the primtive type but uses the vector
@@ -64,7 +67,8 @@ extern Select refSelects[kTypeCount][2];
extern Select vrefSelects[kTypeCount][2];
// Check functions for each output type
-typedef size_t (*CheckResults)(void *out1, void *out2, size_t count, size_t vectorSize);
+typedef size_t (*CheckResults)(const void *const out1, const void *const out2,
+ size_t count, size_t vectorSize);
extern CheckResults checkResults[kTypeCount];
// Helpful macros
diff --git a/test_conformance/select/util_select.cpp b/test_conformance/select/util_select.cpp
index f9641e99..078ff64a 100644
--- a/test_conformance/select/util_select.cpp
+++ b/test_conformance/select/util_select.cpp
@@ -13,10 +13,10 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//
-#include "harness/compat.h"
#include "harness/errorHelpers.h"
#include <stdio.h>
+#include <cinttypes>
#include "test_select.h"
@@ -25,29 +25,28 @@
//-----------------------------------------
-const char *type_name[kTypeCount] = {
- "uchar", "char",
- "ushort", "short",
- "uint", "int",
- "float", "ulong", "long", "double" };
+const char *type_name[kTypeCount] = { "uchar", "char", "ushort", "short",
+ "half", "uint", "int", "float",
+ "ulong", "long", "double" };
const size_t type_size[kTypeCount] = {
- sizeof(cl_uchar), sizeof(cl_char),
- sizeof(cl_ushort), sizeof(cl_short),
- sizeof(cl_uint), sizeof(cl_int),
- sizeof(cl_float), sizeof(cl_ulong), sizeof(cl_long), sizeof( cl_double ) };
+ sizeof(cl_uchar), sizeof(cl_char), sizeof(cl_ushort), sizeof(cl_short),
+ sizeof(cl_half), sizeof(cl_uint), sizeof(cl_int), sizeof(cl_float),
+ sizeof(cl_ulong), sizeof(cl_long), sizeof(cl_double)
+};
const Type ctype[kTypeCount][2] = {
- { kuchar, kchar }, // uchar
- { kuchar, kchar }, // char
- { kushort, kshort}, // ushort
- { kushort, kshort}, // short
- { kuint, kint }, // uint
- { kuint, kint }, // int
- { kuint, kint }, // float
- { kulong, klong }, // ulong
- { kulong, klong }, // long
- { kulong, klong } // double
+ { kuchar, kchar }, // uchar
+ { kuchar, kchar }, // char
+ { kushort, kshort }, // ushort
+ { kushort, kshort }, // short
+ { kushort, kshort }, // half
+ { kuint, kint }, // uint
+ { kuint, kint }, // int
+ { kuint, kint }, // float
+ { kulong, klong }, // ulong
+ { kulong, klong }, // long
+ { kulong, klong } // double
};
@@ -55,510 +54,594 @@ const Type ctype[kTypeCount][2] = {
// Reference functions
//-----------------------------------------
-void refselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i8(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_char *d, *x, *y, *m;
- d = (cl_char*) dest;
- x = (cl_char*) src1;
- y = (cl_char*) src2;
- m = (cl_char*) cmp;
+ cl_char *const d = (cl_char *)dest;
+ const cl_char *const x = (cl_char *)src1;
+ const cl_char *const y = (cl_char *)src2;
+ const cl_char *const m = (cl_char *)cmp;
for (i=0; i < count; ++i) {
d[i] = m[i] ? y[i] : x[i];
}
}
-void refselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1u8(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_uchar *d, *x, *y;
- cl_char *m;
- d = (cl_uchar*) dest;
- x = (cl_uchar*) src1;
- y = (cl_uchar*) src2;
- m = (cl_char*) cmp;
+ cl_uchar *const d = (cl_uchar *)dest;
+ const cl_uchar *const x = (cl_uchar *)src1;
+ const cl_uchar *const y = (cl_uchar *)src2;
+ const cl_char *const m = (cl_char *)cmp;
for (i=0; i < count; ++i) {
d[i] = m[i] ? y[i] : x[i];
}
}
-void refselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i16(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_short *d, *x, *y, *m;
- d = (cl_short*) dest;
- x = (cl_short*) src1;
- y = (cl_short*) src2;
- m = (cl_short*) cmp;
+ cl_short *const d = (cl_short *)dest;
+ const cl_short *const x = (cl_short *)src1;
+ const cl_short *const y = (cl_short *)src2;
+ const cl_short *const m = (cl_short *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1u16(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_ushort *d, *x, *y;
- cl_short *m;
- d = (cl_ushort*) dest;
- x = (cl_ushort*) src1;
- y = (cl_ushort*) src2;
- m = (cl_short*) cmp;
+ cl_ushort *const d = (cl_ushort *)dest;
+ const cl_ushort *const x = (cl_ushort *)src1;
+ const cl_ushort *const y = (cl_ushort *)src2;
+ const cl_short *const m = (cl_short *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i32(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_int *d, *x, *y, *m;
- d = (cl_int*)dest;
- x = (cl_int*)src1;
- y = (cl_int*)src2;
- m = (cl_int*)cmp;
+ cl_int *const d = (cl_int *)dest;
+ const cl_int *const x = (cl_int *)src1;
+ const cl_int *const y = (cl_int *)src2;
+ const cl_int *const m = (cl_int *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){
+void refselect_1u32(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_uint *d, *x, *y;
- cl_int *m;
- d = (cl_uint*)dest;
- x = (cl_uint*)src1;
- y = (cl_uint*)src2;
- m = (cl_int*)cmp;
+ cl_uint *const d = (cl_uint *)dest;
+ const cl_uint *const x = (cl_uint *)src1;
+ const cl_uint *const y = (cl_uint *)src2;
+ const cl_int *const m = (cl_int *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i64(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_long *d, *x, *y, *m;
- d = (cl_long*) dest;
- x = (cl_long*) src1;
- y = (cl_long*) src2;
- m = (cl_long*) cmp;
+ cl_long *const d = (cl_long *)dest;
+ const cl_long *const x = (cl_long *)src1;
+ const cl_long *const y = (cl_long *)src2;
+ const cl_long *const m = (cl_long *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1u64(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_ulong *d, *x, *y;
- cl_long *m;
- d = (cl_ulong*) dest;
- x = (cl_ulong*) src1;
- y = (cl_ulong*) src2;
- m = (cl_long*) cmp;
+ cl_ulong *const d = (cl_ulong *)dest;
+ const cl_ulong *const x = (cl_ulong *)src1;
+ const cl_ulong *const y = (cl_ulong *)src2;
+ const cl_long *const m = (cl_long *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i8u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_char *d, *x, *y;
- cl_uchar *m;
- d = (cl_char*) dest;
- x = (cl_char*) src1;
- y = (cl_char*) src2;
- m = (cl_uchar*) cmp;
+ cl_char *const d = (cl_char *)dest;
+ const cl_char *const x = (cl_char *)src1;
+ const cl_char *const y = (cl_char *)src2;
+ const cl_uchar *const m = (cl_uchar *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1u8u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_uchar *d, *x, *y, *m;
- d = (cl_uchar*) dest;
- x = (cl_uchar*) src1;
- y = (cl_uchar*) src2;
- m = (cl_uchar*) cmp;
+ cl_uchar *const d = (cl_uchar *)dest;
+ const cl_uchar *const x = (cl_uchar *)src1;
+ const cl_uchar *const y = (cl_uchar *)src2;
+ const cl_uchar *const m = (cl_uchar *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i16u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_short *d, *x, *y;
- cl_ushort *m;
- d = (cl_short*) dest;
- x = (cl_short*) src1;
- y = (cl_short*) src2;
- m = (cl_ushort*) cmp;
+ cl_short *const d = (cl_short *)dest;
+ const cl_short *const x = (cl_short *)src1;
+ const cl_short *const y = (cl_short *)src2;
+ const cl_ushort *const m = (cl_ushort *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1u16u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_ushort *d, *x, *y, *m;
- d = (cl_ushort*) dest;
- x = (cl_ushort*) src1;
- y = (cl_ushort*) src2;
- m = (cl_ushort*) cmp;
+ cl_ushort *const d = (cl_ushort *)dest;
+ const cl_ushort *const x = (cl_ushort *)src1;
+ const cl_ushort *const y = (cl_ushort *)src2;
+ const cl_ushort *const m = (cl_ushort *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i32u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_int *d, *x, *y;
- cl_uint *m;
- d = (cl_int*) dest;
- x = (cl_int*) src1;
- y = (cl_int*) src2;
- m = (cl_uint*) cmp;
+ cl_int *const d = (cl_int *)dest;
+ const cl_int *const x = (cl_int *)src1;
+ const cl_int *const y = (cl_int *)src2;
+ const cl_uint *const m = (cl_uint *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1u32u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_uint *d, *x, *y, *m;
- d = (cl_uint*) dest;
- x = (cl_uint*) src1;
- y = (cl_uint*) src2;
- m = (cl_uint*) cmp;
+ cl_uint *const d = (cl_uint *)dest;
+ const cl_uint *const x = (cl_uint *)src1;
+ const cl_uint *const y = (cl_uint *)src2;
+ const cl_uint *const m = (cl_uint *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1i64u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_long *d, *x, *y;
- cl_ulong *m;
- d = (cl_long*) dest;
- x = (cl_long*) src1;
- y = (cl_long*) src2;
- m = (cl_ulong*) cmp;
+ cl_long *const d = (cl_long *)dest;
+ const cl_long *const x = (cl_long *)src1;
+ const cl_long *const y = (cl_long *)src2;
+ const cl_ulong *const m = (cl_ulong *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_1u64u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_ulong *d, *x, *y, *m;
- d = (cl_ulong*) dest;
- x = (cl_ulong*) src1;
- y = (cl_ulong*) src2;
- m = (cl_ulong*) cmp;
+ cl_ulong *const d = (cl_ulong *)dest;
+ const cl_ulong *const x = (cl_ulong *)src1;
+ const cl_ulong *const y = (cl_ulong *)src2;
+ const cl_ulong *const m = (cl_ulong *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_hhi(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
+ size_t i;
+ cl_short *const d = (cl_short *)dest;
+ const cl_short *const x = (cl_short *)src1;
+ const cl_short *const y = (cl_short *)src2;
+ const cl_short *const m = (cl_short *)cmp;
+ for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_hhu(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_int *d, *x, *y;
- cl_int *m;
- d = (cl_int*) dest;
- x = (cl_int*) src1;
- y = (cl_int*) src2;
- m = (cl_int*) cmp;
+ cl_ushort *const d = (cl_ushort *)dest;
+ const cl_ushort *const x = (cl_ushort *)src1;
+ const cl_ushort *const y = (cl_ushort *)src2;
+ const cl_ushort *const m = (cl_ushort *)cmp;
+ for (i = 0; i < count; ++i) d[i] = m[i] ? y[i] : x[i];
+}
+
+void refselect_ffi(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
+ size_t i;
+ cl_int *const d = (cl_int *)dest;
+ const cl_int *const x = (cl_int *)src1;
+ const cl_int *const y = (cl_int *)src2;
+ const cl_int *const m = (cl_int *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_ffu(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_uint *d, *x, *y;
- cl_uint *m;
- d = (cl_uint*) dest;
- x = (cl_uint*) src1;
- y = (cl_uint*) src2;
- m = (cl_uint*) cmp;
+ cl_uint *const d = (cl_uint *)dest;
+ const cl_uint *const x = (cl_uint *)src1;
+ const cl_uint *const y = (cl_uint *)src2;
+ const cl_uint *const m = (cl_uint *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_ddi(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_long *d, *x, *y;
- cl_long *m;
- d = (cl_long*) dest;
- x = (cl_long*) src1;
- y = (cl_long*) src2;
- m = (cl_long*) cmp;
+ cl_long *const d = (cl_long *)dest;
+ const cl_long *const x = (cl_long *)src1;
+ const cl_long *const y = (cl_long *)src2;
+ const cl_long *const m = (cl_long *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void refselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void refselect_ddu(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_long *d, *x, *y;
- cl_ulong *m;
- d = (cl_long*) dest;
- x = (cl_long*) src1;
- y = (cl_long*) src2;
- m = (cl_ulong*) cmp;
+ cl_long *const d = (cl_long *)dest;
+ const cl_long *const x = (cl_long *)src1;
+ const cl_long *const y = (cl_long *)src2;
+ const cl_ulong *const m = (cl_ulong *)cmp;
for (i=0; i < count; ++i)
d[i] = m[i] ? y[i] : x[i];
}
-void vrefselect_1i8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i8(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_char *d, *x, *y, *m;
- d = (cl_char*) dest;
- x = (cl_char*) src1;
- y = (cl_char*) src2;
- m = (cl_char*) cmp;
+ cl_char *const d = (cl_char *)dest;
+ const cl_char *const x = (cl_char *)src1;
+ const cl_char *const y = (cl_char *)src2;
+ const cl_char *const m = (cl_char *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80) ? y[i] : x[i];
}
-void vrefselect_1u8(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1u8(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_uchar *d, *x, *y;
- cl_char *m;
- d = (cl_uchar*) dest;
- x = (cl_uchar*) src1;
- y = (cl_uchar*) src2;
- m = (cl_char*) cmp;
+ cl_uchar *const d = (cl_uchar *)dest;
+ const cl_uchar *const x = (cl_uchar *)src1;
+ const cl_uchar *const y = (cl_uchar *)src2;
+ const cl_char *const m = (cl_char *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80) ? y[i] : x[i];
}
-void vrefselect_1i16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i16(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_short *d, *x, *y, *m;
- d = (cl_short*) dest;
- x = (cl_short*) src1;
- y = (cl_short*) src2;
- m = (cl_short*) cmp;
+ cl_short *const d = (cl_short *)dest;
+ const cl_short *const x = (cl_short *)src1;
+ const cl_short *const y = (cl_short *)src2;
+ const cl_short *const m = (cl_short *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000) ? y[i] : x[i];
}
-void vrefselect_1u16(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1u16(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_ushort *d, *x, *y;
- cl_short *m;
- d = (cl_ushort*) dest;
- x = (cl_ushort*)src1;
- y = (cl_ushort*)src2;
- m = (cl_short*)cmp;
+ cl_ushort *const d = (cl_ushort *)dest;
+ const cl_ushort *const x = (cl_ushort *)src1;
+ const cl_ushort *const y = (cl_ushort *)src2;
+ const cl_short *const m = (cl_short *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000) ? y[i] : x[i];
}
-void vrefselect_1i32(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i32(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_int *d, *x, *y, *m;
- d = (cl_int*) dest;
- x = (cl_int*) src1;
- y = (cl_int*) src2;
- m = (cl_int*) cmp;
+ cl_int *const d = (cl_int *)dest;
+ const cl_int *const x = (cl_int *)src1;
+ const cl_int *const y = (cl_int *)src2;
+ const cl_int *const m = (cl_int *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80000000) ? y[i] : x[i];
}
-void vrefselect_1u32(void *dest, void *src1, void *src2, void *cmp, size_t count){
+void vrefselect_1u32(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_uint *d, *x, *y;
- cl_int *m;
- d = (cl_uint*) dest;
- x = (cl_uint*) src1;
- y = (cl_uint*) src2;
- m = (cl_int*) cmp;
+ cl_uint *const d = (cl_uint *)dest;
+ const cl_uint *const x = (cl_uint *)src1;
+ const cl_uint *const y = (cl_uint *)src2;
+ const cl_int *const m = (cl_int *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80000000) ? y[i] : x[i];
}
-void vrefselect_1i64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i64(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_long *d, *x, *y, *m;
- d = (cl_long*) dest;
- x = (cl_long*) src1;
- y = (cl_long*) src2;
- m = (cl_long*) cmp;
+ cl_long *const d = (cl_long *)dest;
+ const cl_long *const x = (cl_long *)src1;
+ const cl_long *const y = (cl_long *)src2;
+ const cl_long *const m = (cl_long *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i];
}
-void vrefselect_1u64(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1u64(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_ulong *d, *x, *y;
- cl_long *m;
- d = (cl_ulong*) dest;
- x = (cl_ulong*) src1;
- y = (cl_ulong*) src2;
- m = (cl_long*) cmp;
+ cl_ulong *const d = (cl_ulong *)dest;
+ const cl_ulong *const x = (cl_ulong *)src1;
+ const cl_ulong *const y = (cl_ulong *)src2;
+ const cl_long *const m = (cl_long *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i];
}
-void vrefselect_1i8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i8u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_char *d, *x, *y;
- cl_uchar *m;
- d = (cl_char*) dest;
- x = (cl_char*) src1;
- y = (cl_char*) src2;
- m = (cl_uchar*) cmp;
+ cl_char *const d = (cl_char *)dest;
+ const cl_char *const x = (cl_char *)src1;
+ const cl_char *const y = (cl_char *)src2;
+ const cl_uchar *const m = (cl_uchar *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80U) ? y[i] : x[i];
}
-void vrefselect_1u8u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1u8u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_uchar *d, *x, *y, *m;
- d = (cl_uchar*) dest;
- x = (cl_uchar*) src1;
- y = (cl_uchar*) src2;
- m = (cl_uchar*) cmp;
+ cl_uchar *const d = (cl_uchar *)dest;
+ const cl_uchar *const x = (cl_uchar *)src1;
+ const cl_uchar *const y = (cl_uchar *)src2;
+ const cl_uchar *const m = (cl_uchar *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80U) ? y[i] : x[i];
}
-void vrefselect_1i16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i16u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_short *d, *x, *y;
- cl_ushort *m;
- d = (cl_short*) dest;
- x = (cl_short*) src1;
- y = (cl_short*) src2;
- m = (cl_ushort*) cmp;
+ cl_short *const d = (cl_short *)dest;
+ const cl_short *const x = (cl_short *)src1;
+ const cl_short *const y = (cl_short *)src2;
+ const cl_ushort *const m = (cl_ushort *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000U) ? y[i] : x[i];
}
-void vrefselect_1u16u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1u16u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_ushort *d, *x, *y, *m;
- d = (cl_ushort*) dest;
- x = (cl_ushort*) src1;
- y = (cl_ushort*) src2;
- m = (cl_ushort*) cmp;
+ cl_ushort *const d = (cl_ushort *)dest;
+ const cl_ushort *const x = (cl_ushort *)src1;
+ const cl_ushort *const y = (cl_ushort *)src2;
+ const cl_ushort *const m = (cl_ushort *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000U) ? y[i] : x[i];
}
-void vrefselect_1i32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i32u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_int *d, *x, *y;
- cl_uint *m;
- d = (cl_int*) dest;
- x = (cl_int*) src1;
- y = (cl_int*) src2;
- m = (cl_uint*) cmp;
+ cl_int *const d = (cl_int *)dest;
+ const cl_int *const x = (cl_int *)src1;
+ const cl_int *const y = (cl_int *)src2;
+ const cl_uint *const m = (cl_uint *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80000000U) ? y[i] : x[i];
}
-void vrefselect_1u32u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1u32u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_uint *d, *x, *y, *m;
- d = (cl_uint*) dest;
- x = (cl_uint*) src1;
- y = (cl_uint*) src2;
- m = (cl_uint*) cmp;
+ cl_uint *const d = (cl_uint *)dest;
+ const cl_uint *const x = (cl_uint *)src1;
+ const cl_uint *const y = (cl_uint *)src2;
+ const cl_uint *const m = (cl_uint *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80000000U) ? y[i] : x[i];
}
-void vrefselect_1i64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1i64u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_long *d, *x, *y;
- cl_ulong *m;
- d = (cl_long*) dest;
- x = (cl_long*) src1;
- y = (cl_long*) src2;
- m = (cl_ulong*) cmp;
+ cl_long *const d = (cl_long *)dest;
+ const cl_long *const x = (cl_long *)src1;
+ const cl_long *const y = (cl_long *)src2;
+ const cl_ulong *const m = (cl_ulong *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i];
}
-void vrefselect_1u64u(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_1u64u(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp,
+ size_t count)
+{
size_t i;
- cl_ulong *d, *x, *y, *m;
- d = (cl_ulong*) dest;
- x = (cl_ulong*) src1;
- y = (cl_ulong*) src2;
- m = (cl_ulong*) cmp;
+ cl_ulong *const d = (cl_ulong *)dest;
+ const cl_ulong *const x = (cl_ulong *)src1;
+ const cl_ulong *const y = (cl_ulong *)src2;
+ const cl_ulong *const m = (cl_ulong *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i];
}
-void vrefselect_ffi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_hhi(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_uint *d, *x, *y;
- cl_int *m;
- d = (cl_uint*) dest;
- x = (cl_uint*) src1;
- y = (cl_uint*) src2;
- m = (cl_int*) cmp;
+ cl_ushort *const d = (cl_ushort *)dest;
+ const cl_ushort *const x = (cl_ushort *)src1;
+ const cl_ushort *const y = (cl_ushort *)src2;
+ const cl_short *const m = (cl_short *)cmp;
+ for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000) ? y[i] : x[i];
+}
+
+void vrefselect_hhu(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
+ size_t i;
+ cl_ushort *const d = (cl_ushort *)dest;
+ const cl_ushort *const x = (cl_ushort *)src1;
+ const cl_ushort *const y = (cl_ushort *)src2;
+ const cl_ushort *const m = (cl_ushort *)cmp;
+ for (i = 0; i < count; ++i) d[i] = (m[i] & 0x8000U) ? y[i] : x[i];
+}
+
+void vrefselect_ffi(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
+ size_t i;
+ cl_uint *const d = (cl_uint *)dest;
+ const cl_uint *const x = (cl_uint *)src1;
+ const cl_uint *const y = (cl_uint *)src2;
+ const cl_int *const m = (cl_int *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80000000) ? y[i] : x[i];
}
-void vrefselect_ffu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_ffu(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_uint *d, *x, *y;
- cl_uint *m;
- d = (cl_uint*) dest;
- x = (cl_uint*) src1;
- y = (cl_uint*) src2;
- m = (cl_uint*) cmp;
+ cl_uint *const d = (cl_uint *)dest;
+ const cl_uint *const x = (cl_uint *)src1;
+ const cl_uint *const y = (cl_uint *)src2;
+ const cl_uint *const m = (cl_uint *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x80000000U) ? y[i] : x[i];
}
-void vrefselect_ddi(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_ddi(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_ulong *d, *x, *y;
- cl_long *m;
- d = (cl_ulong*) dest;
- x = (cl_ulong*) src1;
- y = (cl_ulong*) src2;
- m = (cl_long*) cmp;
+ cl_ulong *const d = (cl_ulong *)dest;
+ const cl_ulong *const x = (cl_ulong *)src1;
+ const cl_ulong *const y = (cl_ulong *)src2;
+ const cl_long *const m = (cl_long *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000000000000000LL) ? y[i] : x[i];
}
-void vrefselect_ddu(void *dest, void *src1, void *src2, void *cmp, size_t count) {
+void vrefselect_ddu(void *const dest, const void *const src1,
+ const void *const src2, const void *const cmp, size_t count)
+{
size_t i;
- cl_ulong *d, *x, *y;
- cl_ulong *m;
- d = (cl_ulong*) dest;
- x = (cl_ulong*) src1;
- y = (cl_ulong*) src2;
- m = (cl_ulong*) cmp;
+ cl_ulong *const d = (cl_ulong *)dest;
+ const cl_ulong *const x = (cl_ulong *)src1;
+ const cl_ulong *const y = (cl_ulong *)src2;
+ const cl_ulong *const m = (cl_ulong *)cmp;
for (i=0; i < count; ++i)
d[i] = (m[i] & 0x8000000000000000ULL) ? y[i] : x[i];
}
// Define refSelects
-Select refSelects[kTypeCount][2] = {
- { refselect_1u8u, refselect_1u8 }, // cl_uchar
- { refselect_1i8u, refselect_1i8 }, // char
+Select refSelects[kTypeCount][2] = {
+ { refselect_1u8u, refselect_1u8 }, // cl_uchar
+ { refselect_1i8u, refselect_1i8 }, // char
{ refselect_1u16u, refselect_1u16 }, // ushort
{ refselect_1i16u, refselect_1i16 }, // short
+ { refselect_hhu, refselect_hhi }, // half
{ refselect_1u32u, refselect_1u32 }, // uint
{ refselect_1i32u, refselect_1i32 }, // int
- { refselect_ffu, refselect_ffi }, // float
+ { refselect_ffu, refselect_ffi }, // float
{ refselect_1u64u, refselect_1u64 }, // ulong
{ refselect_1i64u, refselect_1i64 }, // long
- { refselect_ddu, refselect_ddi } // double
+ { refselect_ddu, refselect_ddi } // double
};
// Define vrefSelects (vector refSelects)
-Select vrefSelects[kTypeCount][2] = {
- { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar
- { vrefselect_1i8u, vrefselect_1i8 }, // char
+Select vrefSelects[kTypeCount][2] = {
+ { vrefselect_1u8u, vrefselect_1u8 }, // cl_uchar
+ { vrefselect_1i8u, vrefselect_1i8 }, // char
{ vrefselect_1u16u, vrefselect_1u16 }, // ushort
{ vrefselect_1i16u, vrefselect_1i16 }, // short
+ { vrefselect_hhu, vrefselect_hhi }, // half
{ vrefselect_1u32u, vrefselect_1u32 }, // uint
{ vrefselect_1i32u, vrefselect_1i32 }, // int
- { vrefselect_ffu, vrefselect_ffi }, // float
+ { vrefselect_ffu, vrefselect_ffi }, // float
{ vrefselect_1u64u, vrefselect_1u64 }, // ulong
{ vrefselect_1i64u, vrefselect_1i64 }, // long
- { vrefselect_ddu, vrefselect_ddi } // double
+ { vrefselect_ddu, vrefselect_ddi } // double
};
//-----------------------------------------
// Check functions
//-----------------------------------------
-size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_uchar *t = (const cl_uchar *) test;
- const cl_uchar *c = (const cl_uchar *) correct;
+size_t check_uchar(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_uchar *const t = (const cl_uchar *)test;
+ const cl_uchar *const c = (const cl_uchar *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -566,8 +649,8 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size)
for (i = 0; i < count; i++)
if (t[i] != c[i])
{
- log_error("\n(check_uchar) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
+ log_error("\n(check_uchar) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
"*0x%2.2x vs 0x%2.2x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
@@ -576,9 +659,11 @@ size_t check_uchar(void *test, void *correct, size_t count, size_t vector_size)
return 0;
}
-size_t check_char(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_char *t = (const cl_char *) test;
- const cl_char *c = (const cl_char *) correct;
+size_t check_char(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_char *const t = (const cl_char *)test;
+ const cl_char *const c = (const cl_char *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -586,8 +671,8 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) {
for (i = 0; i < count; i++)
if (t[i] != c[i])
{
- log_error("\n(check_char) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
+ log_error("\n(check_char) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
"*0x%2.2x vs 0x%2.2x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
@@ -597,9 +682,11 @@ size_t check_char(void *test, void *correct, size_t count, size_t vector_size) {
return 0;
}
-size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_ushort *t = (const cl_ushort *) test;
- const cl_ushort *c = (const cl_ushort *) correct;
+size_t check_ushort(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_ushort *const t = (const cl_ushort *)test;
+ const cl_ushort *const c = (const cl_ushort *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -607,8 +694,8 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size)
for (i = 0; i < count; i++)
if (t[i] != c[i])
{
- log_error("\n(check_ushort) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
+ log_error("\n(check_ushort) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
"*0x%4.4x vs 0x%4.4x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
@@ -618,9 +705,11 @@ size_t check_ushort(void *test, void *correct, size_t count, size_t vector_size)
return 0;
}
-size_t check_short(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_short *t = (const cl_short *) test;
- const cl_short *c = (const cl_short *) correct;
+size_t check_short(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_short *const t = (const cl_short *)test;
+ const cl_short *const c = (const cl_short *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -628,8 +717,8 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size)
for (i = 0; i < count; i++)
if (t[i] != c[i])
{
- log_error("\n(check_short) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
+ log_error("\n(check_short) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
"*0x%8.8x vs 0x%8.8x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
@@ -639,9 +728,11 @@ size_t check_short(void *test, void *correct, size_t count, size_t vector_size)
return 0;
}
-size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_uint *t = (const cl_uint *) test;
- const cl_uint *c = (const cl_uint *) correct;
+size_t check_uint(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_uint *const t = (const cl_uint *)test;
+ const cl_uint *const c = (const cl_uint *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -649,8 +740,8 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) {
for (i = 0; i < count; i++)
if (t[i] != c[i])
{
- log_error("\n(check_uint) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
+ log_error("\n(check_uint) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
"*0x%8.8x vs 0x%8.8x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
@@ -660,9 +751,11 @@ size_t check_uint(void *test, void *correct, size_t count, size_t vector_size) {
return 0;
}
-size_t check_int(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_int *t = (const cl_int *) test;
- const cl_int *c = (const cl_int *) correct;
+size_t check_int(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_int *const t = (const cl_int *)test;
+ const cl_int *const c = (const cl_int *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -671,8 +764,8 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) {
if (t[i] != c[i])
{
- log_error("\n(check_int) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
+ log_error("\n(check_int) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
"*0x%8.8x vs 0x%8.8x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
@@ -682,9 +775,11 @@ size_t check_int(void *test, void *correct, size_t count, size_t vector_size) {
return 0;
}
-size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_ulong *t = (const cl_ulong *) test;
- const cl_ulong *c = (const cl_ulong *) correct;
+size_t check_ulong(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_ulong *const t = (const cl_ulong *)test;
+ const cl_ulong *const c = (const cl_ulong *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -692,9 +787,9 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size)
for (i = 0; i < count; i++)
if (t[i] != c[i])
{
- log_error("\n(check_ulong) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
- "*0x%16.16llx vs 0x%16.16llx\n",
+ log_error("\n(check_ulong) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
+ "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
}
@@ -703,9 +798,11 @@ size_t check_ulong(void *test, void *correct, size_t count, size_t vector_size)
return 0;
}
-size_t check_long(void *test, void *correct, size_t count, size_t vector_size) {
- const cl_long *t = (const cl_long *) test;
- const cl_long *c = (const cl_long *) correct;
+size_t check_long(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_long *const t = (const cl_long *)test;
+ const cl_long *const c = (const cl_long *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -713,9 +810,34 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) {
for (i = 0; i < count; i++)
if (t[i] != c[i])
{
- log_error("\n(check_long) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
- "*0x%16.16llx vs 0x%16.16llx\n",
+ log_error("\n(check_long) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
+ "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n",
+ vector_size, i, count, c[i], t[i]);
+ return i + 1;
+ }
+ }
+
+ return 0;
+}
+
+size_t check_half(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_ushort *const t = (const cl_ushort *)test;
+ const cl_ushort *const c = (const cl_ushort *)correct;
+ size_t i;
+
+ if (memcmp(t, c, count * sizeof(c[0])) != 0)
+ {
+ for (i = 0; i < count; i++) /* Allow nans to be binary different */
+ if ((t[i] != c[i])
+ && !(isnan(((cl_half *)correct)[i])
+ && isnan(((cl_half *)test)[i])))
+ {
+ log_error("\n(check_half) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
+ "*0x%4.4x vs 0x%4.4x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
}
@@ -724,9 +846,11 @@ size_t check_long(void *test, void *correct, size_t count, size_t vector_size) {
return 0;
}
-size_t check_float( void *test, void *correct, size_t count, size_t vector_size ) {
- const cl_uint *t = (const cl_uint *) test;
- const cl_uint *c = (const cl_uint *) correct;
+size_t check_float(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_uint *const t = (const cl_uint *)test;
+ const cl_uint *const c = (const cl_uint *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -735,8 +859,8 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size
if ((t[i] != c[i])
&& !(isnan(((float *)correct)[i]) && isnan(((float *)test)[i])))
{
- log_error("\n(check_float) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
+ log_error("\n(check_float) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
"*0x%8.8x vs 0x%8.8x\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
@@ -746,9 +870,11 @@ size_t check_float( void *test, void *correct, size_t count, size_t vector_size
return 0;
}
-size_t check_double( void *test, void *correct, size_t count, size_t vector_size ) {
- const cl_ulong *t = (const cl_ulong *) test;
- const cl_ulong *c = (const cl_ulong *) correct;
+size_t check_double(const void *const test, const void *const correct,
+ size_t count, size_t vector_size)
+{
+ const cl_ulong *const t = (const cl_ulong *)test;
+ const cl_ulong *const c = (const cl_ulong *)correct;
size_t i;
if (memcmp(t, c, count * sizeof(c[0])) != 0)
@@ -758,9 +884,9 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size
&& !(isnan(((double *)correct)[i])
&& isnan(((double *)test)[i])))
{
- log_error("\n(check_double) Error for vector size %ld found at "
- "0x%8.8lx (of 0x%8.8lx): "
- "*0x%16.16llx vs 0x%16.16llx\n",
+ log_error("\n(check_double) Error for vector size %zu found at "
+ "0x%8.8zx (of 0x%8.8zx): "
+ "*0x%16.16" PRIx64 " vs 0x%16.16" PRIx64 "\n",
vector_size, i, count, c[i], t[i]);
return i + 1;
}
@@ -770,5 +896,7 @@ size_t check_double( void *test, void *correct, size_t count, size_t vector_size
}
CheckResults checkResults[kTypeCount] = {
- check_uchar, check_char, check_ushort, check_short, check_uint,
- check_int, check_float, check_ulong, check_long, check_double };
+ check_uchar, check_char, check_ushort, check_short,
+ check_half, check_uint, check_int, check_float,
+ check_ulong, check_long, check_double
+};
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32
new file mode 100644
index 00000000..49127187
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm32
@@ -0,0 +1,35 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 17
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Float16
+ OpMemoryModel Physical32 OpenCL
+ OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID
+ OpName %in "in"
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %in FuncParamAttr NoCapture
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ %uint = OpTypeInt 32 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+ %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %1 = OpFunction %void None %10
+ %in = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %11 = OpLabel
+ %12 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
+ %13 = OpCompositeExtract %uint %12 0
+ %14 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %13
+ %15 = OpLoad %half %14
+ %16 = OpFNegate %half %15
+ OpStore %14 %16
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64
new file mode 100644
index 00000000..9c7e3d6d
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/op_neg_half.spvasm64
@@ -0,0 +1,39 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 20
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Int64
+ OpCapability Float16
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "op_neg_half" %gl_GlobalInvocationID
+ OpName %in "in"
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %in FuncParamAttr NoCapture
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ %ulong = OpTypeInt 64 0
+ %v3ulong = OpTypeVector %ulong 3
+%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+ %10 = OpTypeFunction %void %_ptr_CrossWorkgroup_half
+ %ulong_32 = OpConstant %ulong 32
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+ %1 = OpFunction %void None %10
+ %in = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %12 = OpLabel
+ %13 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
+ %14 = OpCompositeExtract %ulong %13 0
+ %15 = OpShiftLeftLogical %ulong %14 %ulong_32
+ %16 = OpShiftRightArithmetic %ulong %15 %ulong_32
+ %17 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %16
+ %18 = OpLoad %half %17
+ %19 = OpFNegate %half %18
+ OpStore %17 %19
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32
new file mode 100644
index 00000000..985b5262
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm32
@@ -0,0 +1,42 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 22
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Vector16
+ OpCapability Float16
+ OpMemoryModel Physical32 OpenCL
+ OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID
+ OpName %in "in"
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %in FuncParamAttr NoCapture
+ OpDecorate %4 FuncParamAttr NoCapture
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ %uint = OpTypeInt 32 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+ %v8half = OpTypeVector %half 4
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half
+ %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %1 = OpFunction %void None %13
+ %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half
+ %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %14 = OpFunctionParameter %uint
+ %15 = OpLabel
+ %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
+ %17 = OpCompositeExtract %uint %16 0
+ %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %17
+ %19 = OpLoad %v8half %18
+ %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %17
+ %21 = OpVectorExtractDynamic %half %19 %14
+ OpStore %20 %21
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64
new file mode 100644
index 00000000..dd14f66c
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/vector_half8_extract.spvasm64
@@ -0,0 +1,47 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 26
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Int64
+ OpCapability Vector16
+ OpCapability Float16
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "vector_half8_extract" %gl_GlobalInvocationID
+ OpName %in "in"
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %in FuncParamAttr NoCapture
+ OpDecorate %4 FuncParamAttr NoCapture
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ %ulong = OpTypeInt 64 0
+ %v3ulong = OpTypeVector %ulong 3
+%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+ %ulong_32 = OpConstant %ulong 32
+ %uint = OpTypeInt 32 0
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+ %v8half = OpTypeVector %half 8
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half
+ %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v8half %_ptr_CrossWorkgroup_half %uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+ %1 = OpFunction %void None %15
+ %in = OpFunctionParameter %_ptr_CrossWorkgroup_v8half
+ %4 = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %16 = OpFunctionParameter %uint
+ %17 = OpLabel
+ %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
+ %19 = OpCompositeExtract %ulong %18 0
+ %20 = OpShiftLeftLogical %ulong %19 %ulong_32
+ %21 = OpShiftRightArithmetic %ulong %20 %ulong_32
+ %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %in %21
+ %23 = OpLoad %v8half %22
+ %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %4 %21
+ %25 = OpVectorExtractDynamic %half %23 %16
+ OpStore %24 %25
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32
new file mode 100644
index 00000000..27812938
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm32
@@ -0,0 +1,43 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 23
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Vector16
+ OpCapability Float16
+ OpMemoryModel Physical32 OpenCL
+ OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID
+ OpName %in "in"
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %in FuncParamAttr NoCapture
+ OpDecorate %4 FuncParamAttr NoCapture
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ %uint = OpTypeInt 32 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+ %v8half = OpTypeVector %half 8
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half
+ %13 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %1 = OpFunction %void None %13
+ %in = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half
+ %14 = OpFunctionParameter %uint
+ %15 = OpLabel
+ %16 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
+ %17 = OpCompositeExtract %uint %16 0
+ %18 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %17
+ %19 = OpLoad %half %18
+ %20 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %17
+ %21 = OpLoad %v8half %20
+ %22 = OpVectorInsertDynamic %v8half %21 %19 %14
+ OpStore %20 %22
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64
new file mode 100644
index 00000000..f140fc25
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/vector_half8_insert.spvasm64
@@ -0,0 +1,48 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 27
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Int64
+ OpCapability Vector16
+ OpCapability Float16
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "vector_half8_insert" %gl_GlobalInvocationID
+ OpName %in "in"
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %in FuncParamAttr NoCapture
+ OpDecorate %4 FuncParamAttr NoCapture
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ %ulong = OpTypeInt 64 0
+ %v3ulong = OpTypeVector %ulong 3
+%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+ %ulong_32 = OpConstant %ulong 32
+ %uint = OpTypeInt 32 0
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+ %v8half = OpTypeVector %half 8
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+%_ptr_CrossWorkgroup_v8half = OpTypePointer CrossWorkgroup %v8half
+ %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_half %_ptr_CrossWorkgroup_v8half %uint
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+ %1 = OpFunction %void None %15
+ %in = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %4 = OpFunctionParameter %_ptr_CrossWorkgroup_v8half
+ %16 = OpFunctionParameter %uint
+ %17 = OpLabel
+ %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
+ %19 = OpCompositeExtract %ulong %18 0
+ %20 = OpShiftLeftLogical %ulong %19 %ulong_32
+ %21 = OpShiftRightArithmetic %ulong %20 %ulong_32
+ %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %in %21
+ %23 = OpLoad %half %22
+ %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v8half %4 %21
+ %25 = OpLoad %v8half %24
+ %26 = OpVectorInsertDynamic %v8half %25 %23 %16
+ OpStore %24 %26
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32
new file mode 100644
index 00000000..6fda7d8f
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm32
@@ -0,0 +1,46 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 25
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Float16
+ OpMemoryModel Physical32 OpenCL
+ OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+ OpName %res "res"
+ OpName %lhs "lhs"
+ OpName %rhs "rhs"
+ OpDecorate %5 FuncParamAttr NoCapture
+ %5 = OpDecorationGroup
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ OpGroupDecorate %5 %res %lhs %rhs
+ %uint = OpTypeInt 32 0
+ %v3uint = OpTypeVector %uint 3
+%_ptr_Input_v3uint = OpTypePointer Input %v3uint
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+ %v4half = OpTypeVector %half 4
+%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half
+ %15 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input
+ %1 = OpFunction %void None %15
+ %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half
+ %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half
+ %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %16 = OpLabel
+ %17 = OpLoad %v3uint %gl_GlobalInvocationID Aligned 0
+ %18 = OpCompositeExtract %uint %17 0
+ %19 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %18
+ %20 = OpLoad %v4half %19 Aligned 8
+ %21 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %18
+ %22 = OpLoad %half %21 Aligned 2
+ %23 = OpVectorTimesScalar %v4half %20 %22
+ %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %18
+ OpStore %24 %23 Aligned 8
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64 b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64
new file mode 100644
index 00000000..fa2d5221
--- /dev/null
+++ b/test_conformance/spirv_new/spirv_asm/vector_times_scalar_half.spvasm64
@@ -0,0 +1,50 @@
+; SPIR-V
+; Version: 1.0
+; Generator: Khronos SPIR-V Tools Assembler; 0
+; Bound: 28
+; Schema: 0
+ OpCapability Addresses
+ OpCapability Linkage
+ OpCapability Kernel
+ OpCapability Int64
+ OpCapability Float16
+ OpMemoryModel Physical64 OpenCL
+ OpEntryPoint Kernel %1 "vector_times_scalar" %gl_GlobalInvocationID
+ OpName %res "res"
+ OpName %lhs "lhs"
+ OpName %rhs "rhs"
+ OpDecorate %5 FuncParamAttr NoCapture
+ %5 = OpDecorationGroup
+ OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId
+ OpDecorate %gl_GlobalInvocationID Constant
+ OpDecorate %gl_GlobalInvocationID LinkageAttributes "__spirv_GlobalInvocationId" Import
+ OpGroupDecorate %5 %res %lhs %rhs
+ %ulong = OpTypeInt 64 0
+ %v3ulong = OpTypeVector %ulong 3
+%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+ %ulong_32 = OpConstant %ulong 32
+ %void = OpTypeVoid
+ %half = OpTypeFloat 16
+%_ptr_CrossWorkgroup_half = OpTypePointer CrossWorkgroup %half
+ %v4half = OpTypeVector %half 4
+%_ptr_CrossWorkgroup_v4half = OpTypePointer CrossWorkgroup %v4half
+ %16 = OpTypeFunction %void %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_v4half %_ptr_CrossWorkgroup_half
+%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3ulong Input
+ %1 = OpFunction %void None %16
+ %res = OpFunctionParameter %_ptr_CrossWorkgroup_v4half
+ %lhs = OpFunctionParameter %_ptr_CrossWorkgroup_v4half
+ %rhs = OpFunctionParameter %_ptr_CrossWorkgroup_half
+ %17 = OpLabel
+ %18 = OpLoad %v3ulong %gl_GlobalInvocationID Aligned 0
+ %19 = OpCompositeExtract %ulong %18 0
+ %20 = OpShiftLeftLogical %ulong %19 %ulong_32
+ %21 = OpShiftRightArithmetic %ulong %20 %ulong_32
+ %22 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %lhs %21
+ %23 = OpLoad %v4half %22 Aligned 8
+ %24 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_half %rhs %21
+ %25 = OpLoad %half %24 Aligned 2
+ %26 = OpVectorTimesScalar %v4half %23 %25
+ %27 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_v4half %res %21
+ OpStore %27 %26 Aligned 8
+ OpReturn
+ OpFunctionEnd
diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp
index e3dc1f34..5009be93 100644
--- a/test_conformance/spirv_new/test_op_negate.cpp
+++ b/test_conformance/spirv_new/test_op_negate.cpp
@@ -32,6 +32,15 @@ int test_negation(cl_device_id deviceID,
return 0;
}
}
+ if (std::string(Tname).find("half") != std::string::npos)
+ {
+ if (!is_extension_available(deviceID, "cl_khr_fp16"))
+ {
+ log_info(
+ "Extension cl_khr_fp16 not supported; skipping half tests.\n");
+ return 0;
+ }
+ }
cl_int err = CL_SUCCESS;
int num = (int)h_in.size();
@@ -73,29 +82,28 @@ int test_negation(cl_device_id deviceID,
return 0;
}
-#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \
- TEST_SPIRV_FUNC(OP##_##TYPE) \
- { \
- int num = 1 << 20; \
- std::vector<Tv> in(num); \
- RandomSeed seed(gRandomSeed); \
- for (int i = 0; i < num; i++) { \
- in[i] = genrand<Tv>(seed); \
- } \
- return test_negation<Tv>(deviceID, \
- context, \
- queue, \
- #TYPE, \
- #OP, \
- in, FUNC); \
- } \
+#define TEST_NEGATION(TYPE, Tv, OP, FUNC) \
+ TEST_SPIRV_FUNC(OP##_##TYPE) \
+ { \
+ int num = 1 << 20; \
+ std::vector<Tv> in(num); \
+ RandomSeed seed(gRandomSeed); \
+ for (int i = 0; i < num; i++) \
+ { \
+ in[i] = genrand<Tv>(seed); \
+ } \
+ return test_negation<Tv>(deviceID, context, queue, #TYPE, #OP, in, \
+ FUNC); \
+ }
+#define TEST_NEG_HALF TEST_NEGATION(half, cl_half, op_neg, negOpHalf)
#define TEST_NEG(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_neg, negOp<cl_##TYPE>)
#define TEST_NOT(TYPE) TEST_NEGATION(TYPE, cl_##TYPE, op_not, notOp<cl_##TYPE>)
#define TEST_NEG_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_neg, (negOpVec<cl_##TYPE##N, N>))
#define TEST_NOT_VEC(TYPE, N) TEST_NEGATION(TYPE##N, cl_##TYPE##N, op_not, (notOpVec<cl_##TYPE##N, N>))
+TEST_NEG_HALF
TEST_NEG(float)
TEST_NEG(double)
TEST_NEG(int)
diff --git a/test_conformance/spirv_new/test_op_vector_extract.cpp b/test_conformance/spirv_new/test_op_vector_extract.cpp
index fe1f8253..f77aa7a2 100644
--- a/test_conformance/spirv_new/test_op_vector_extract.cpp
+++ b/test_conformance/spirv_new/test_op_vector_extract.cpp
@@ -25,6 +25,17 @@ int test_extract(cl_device_id deviceID, cl_context context,
return 0;
}
}
+
+ if (std::string(name).find("half") != std::string::npos)
+ {
+ if (!is_extension_available(deviceID, "cl_khr_fp16"))
+ {
+ log_info(
+ "Extension cl_khr_fp16 not supported; skipping half tests.\n");
+ return 0;
+ }
+ }
+
cl_int err = CL_SUCCESS;
clProgramWrapper prog;
@@ -76,27 +87,30 @@ int test_extract(cl_device_id deviceID, cl_context context,
return 0;
}
-#define TEST_VECTOR_EXTRACT(TYPE, N) \
- TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \
- { \
- typedef cl_##TYPE##N Tv; \
- typedef cl_##TYPE Ts; \
- const int num = 1 << 20; \
- std::vector<Tv> in(num); \
- const char *name = "vector_" #TYPE #N "_extract"; \
- \
- RandomSeed seed(gRandomSeed); \
- \
- for (int i = 0; i < num; i++) { \
- in[i] = genrand<Tv>(seed); \
- } \
- \
- return test_extract<Tv, Ts>(deviceID, \
- context, queue, \
- name, \
- in, N); \
+#define TEST_VECTOR_EXTRACT(TYPE, N) \
+ TEST_SPIRV_FUNC(op_vector_##TYPE##N##_extract) \
+ { \
+ if (sizeof(cl_##TYPE) == 2) \
+ { \
+ PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \
+ } \
+ typedef cl_##TYPE##N Tv; \
+ typedef cl_##TYPE Ts; \
+ const int num = 1 << 20; \
+ std::vector<Tv> in(num); \
+ const char *name = "vector_" #TYPE #N "_extract"; \
+ \
+ RandomSeed seed(gRandomSeed); \
+ \
+ for (int i = 0; i < num; i++) \
+ { \
+ in[i] = genrand<Tv>(seed); \
+ } \
+ \
+ return test_extract<Tv, Ts>(deviceID, context, queue, name, in, N); \
}
+TEST_VECTOR_EXTRACT(half, 8)
TEST_VECTOR_EXTRACT(int, 4)
TEST_VECTOR_EXTRACT(float, 4)
TEST_VECTOR_EXTRACT(long, 2)
diff --git a/test_conformance/spirv_new/test_op_vector_insert.cpp b/test_conformance/spirv_new/test_op_vector_insert.cpp
index 0749c14a..62fc78cb 100644
--- a/test_conformance/spirv_new/test_op_vector_insert.cpp
+++ b/test_conformance/spirv_new/test_op_vector_insert.cpp
@@ -25,6 +25,17 @@ int test_insert(cl_device_id deviceID, cl_context context,
return 0;
}
}
+
+ if (std::string(name).find("half") != std::string::npos)
+ {
+ if (!is_extension_available(deviceID, "cl_khr_fp16"))
+ {
+ log_info(
+ "Extension cl_khr_fp16 not supported; skipping half tests.\n");
+ return 0;
+ }
+ }
+
cl_int err = CL_SUCCESS;
clProgramWrapper prog;
err = get_program_with_il(prog, deviceID, context, name);
@@ -94,27 +105,30 @@ int test_insert(cl_device_id deviceID, cl_context context,
return 0;
}
-#define TEST_VECTOR_INSERT(TYPE, N) \
- TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \
- { \
- typedef cl_##TYPE##N Tv; \
- typedef cl_##TYPE Ts; \
- const int num = 1 << 20; \
- std::vector<Ts> in(num); \
- const char *name = "vector_" #TYPE #N "_insert"; \
- \
- RandomSeed seed(gRandomSeed); \
- \
- for (int i = 0; i < num; i++) { \
- in[i] = genrand<Ts>(seed); \
- } \
- \
- return test_insert<Ts, Tv>(deviceID, \
- context, queue, \
- name, \
- in, N); \
+#define TEST_VECTOR_INSERT(TYPE, N) \
+ TEST_SPIRV_FUNC(op_vector_##TYPE##N##_insert) \
+ { \
+ if (sizeof(cl_##TYPE) == 2) \
+ { \
+ PASSIVE_REQUIRE_FP16_SUPPORT(deviceID); \
+ } \
+ typedef cl_##TYPE##N Tv; \
+ typedef cl_##TYPE Ts; \
+ const int num = 1 << 20; \
+ std::vector<Ts> in(num); \
+ const char *name = "vector_" #TYPE #N "_insert"; \
+ \
+ RandomSeed seed(gRandomSeed); \
+ \
+ for (int i = 0; i < num; i++) \
+ { \
+ in[i] = genrand<Ts>(seed); \
+ } \
+ \
+ return test_insert<Ts, Tv>(deviceID, context, queue, name, in, N); \
}
+TEST_VECTOR_INSERT(half, 8)
TEST_VECTOR_INSERT(int, 4)
TEST_VECTOR_INSERT(float, 4)
TEST_VECTOR_INSERT(long, 2)
diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
index 0859668c..0be4e8b7 100644
--- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
+++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
@@ -17,6 +17,8 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos
#include <sstream>
#include <string>
+using half = cl_half;
+
template<typename Tv, typename Ts>
int test_vector_times_scalar(cl_device_id deviceID,
cl_context context,
@@ -32,6 +34,16 @@ int test_vector_times_scalar(cl_device_id deviceID,
}
}
+ if (std::string(Tname).find("half") != std::string::npos)
+ {
+ if (!is_extension_available(deviceID, "cl_khr_fp16"))
+ {
+ log_info("Extension cl_khr_fp16 not supported; skipping half "
+ "tests.\n");
+ return 0;
+ }
+ }
+
cl_int err = CL_SUCCESS;
int num = (int)h_lhs.size();
size_t lhs_bytes = num * sizeof(Tv);
@@ -171,5 +183,7 @@ int test_vector_times_scalar(cl_device_id deviceID,
lhs, rhs); \
}
+
TEST_VECTOR_TIMES_SCALAR(float, 4)
TEST_VECTOR_TIMES_SCALAR(double, 4)
+TEST_VECTOR_TIMES_SCALAR(half, 4)
diff --git a/test_conformance/spirv_new/types.hpp b/test_conformance/spirv_new/types.hpp
index e7fceba0..728b2445 100644
--- a/test_conformance/spirv_new/types.hpp
+++ b/test_conformance/spirv_new/types.hpp
@@ -43,6 +43,8 @@ VEC_NOT_EQ_FUNC(cl_float, 2)
VEC_NOT_EQ_FUNC(cl_float, 4)
VEC_NOT_EQ_FUNC(cl_double, 2)
VEC_NOT_EQ_FUNC(cl_double, 4)
+VEC_NOT_EQ_FUNC(cl_half, 2)
+VEC_NOT_EQ_FUNC(cl_half, 4)
template<typename T>
bool isNotEqual(const T &lhs, const T &rhs)
@@ -109,6 +111,9 @@ GENRAND_REAL_FUNC(cl_float, 2)
GENRAND_REAL_FUNC(cl_float, 4)
GENRAND_REAL_FUNC(cl_double, 2)
GENRAND_REAL_FUNC(cl_double, 4)
+GENRAND_REAL_FUNC(cl_half, 2)
+GENRAND_REAL_FUNC(cl_half, 4)
+GENRAND_REAL_FUNC(cl_half, 8)
template<> inline cl_half genrandReal<cl_half>(RandomSeed &seed)
{
@@ -157,6 +162,8 @@ Tv negOp(Tv in)
return -in;
}
+inline cl_half negOpHalf(cl_half v) { return v ^ 0x8000; }
+
template<typename Tv>
Tv notOp(Tv in)
{
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index f779ef37..d9dfc3b8 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -483,29 +483,30 @@ template <typename Ty, ShuffleOp operation> struct SHF
static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
const WorkGroupParams &test_params)
{
- int ii, i, j, k, n;
+ int ii, k;
+ size_t n;
cl_uint l;
- int nw = test_params.local_workgroup_size;
- int ns = test_params.subgroup_size;
+ size_t nw = test_params.local_workgroup_size;
+ size_t ns = test_params.subgroup_size;
int ng = test_params.global_workgroup_size;
- int nj = (nw + ns - 1) / ns;
+ size_t nj = (nw + ns - 1) / ns;
Ty tr, rr;
ng = ng / nw;
for (k = 0; k < ng; ++k)
{ // for each work_group
- for (j = 0; j < nw; ++j)
+ for (size_t j = 0; j < nw; ++j)
{ // inside the work_group
mx[j] = x[j]; // read host inputs for work_group
my[j] = y[j]; // read device outputs for work_group
}
- for (j = 0; j < nj; ++j)
+ for (size_t j = 0; j < nj; ++j)
{ // for each subgroup
ii = j * ns;
n = ii + ns > nw ? nw - ii : ns;
- for (i = 0; i < n; ++i)
+ for (size_t i = 0; i < n; ++i)
{ // inside the subgroup
// shuffle index storage
int midx = 4 * ii + 4 * i + 2;
diff --git a/test_conformance/subgroups/subhelpers.cpp b/test_conformance/subgroups/subhelpers.cpp
index 11268f64..440cde20 100644
--- a/test_conformance/subgroups/subhelpers.cpp
+++ b/test_conformance/subgroups/subhelpers.cpp
@@ -206,7 +206,7 @@ void set_last_workgroup_params(int non_uniform_size, int &number_of_subgroups,
}
void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
- int sb_size)
+ size_t sb_size)
{
// max product is 720, cl_half has enough precision for it
const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 };
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index bcb523cf..ed92e5d3 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -44,7 +44,7 @@ cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
// for each subgroup values defined different values
// for rest of workitems set 1 shuffle values
void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
- int sb_size);
+ size_t sb_size);
struct WorkGroupParams
{
diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp
index b69f3138..5b2a5eb8 100644
--- a/test_conformance/subgroups/test_workitem.cpp
+++ b/test_conformance/subgroups/test_workitem.cpp
@@ -36,7 +36,7 @@ struct get_test_data
};
static int check_group(const get_test_data *result, int nw, cl_uint ensg,
- int maxwgs)
+ size_t maxwgs)
{
int first = -1;
int last = -1;
@@ -168,7 +168,7 @@ static int check_group(const get_test_data *result, int nw, cl_uint ensg,
j = (result[first].subGroupSize + 31) / 32 * result[i].subGroupId
+ (result[i].subGroupLocalId >> 5);
- if (j < sizeof(hit) / 4)
+ if (j < static_cast<int>(sizeof(hit) / 4))
{
cl_uint b = 1U << (result[i].subGroupLocalId & 0x1fU);
if ((hit[j] & b) != 0)
@@ -191,7 +191,7 @@ int test_work_item_functions(cl_device_id device, cl_context context,
static const size_t lsize = 200;
int error;
int i, j, k, q, r, nw;
- int maxwgs;
+ size_t maxwgs;
cl_uint ensg;
size_t global;
size_t local;
@@ -235,7 +235,7 @@ int test_work_item_functions(cl_device_id device, cl_context context,
error = get_max_allowed_work_group_size(context, kernel, &local, NULL);
if (error != 0) return error;
- maxwgs = (int)local;
+ maxwgs = local;
// Limit it a bit so we have muliple work groups
// Ideally this will still be large enough to give us multiple subgroups
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
index 5901420a..eb1afeb0 100644
--- a/test_conformance/vulkan/main.cpp
+++ b/test_conformance/vulkan/main.cpp
@@ -52,7 +52,8 @@ static void params_reset()
}
extern int test_buffer_common(cl_device_id device_, cl_context context_,
- cl_command_queue queue_, int numElements_);
+ cl_command_queue queue_, int numElements_,
+ float use_fence);
extern int test_image_common(cl_device_id device_, cl_context context_,
cl_command_queue queue_, int numElements_);
@@ -61,7 +62,7 @@ int test_buffer_single_queue(cl_device_id device_, cl_context context_,
{
params_reset();
log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
- return test_buffer_common(device_, context_, queue_, numElements_);
+ return test_buffer_common(device_, context_, queue_, numElements_, false);
}
int test_buffer_multiple_queue(cl_device_id device_, cl_context context_,
cl_command_queue queue_, int numElements_)
@@ -69,7 +70,7 @@ int test_buffer_multiple_queue(cl_device_id device_, cl_context context_,
params_reset();
numCQ = 2;
log_info("RUNNING TEST WITH TWO QUEUE...... \n\n");
- return test_buffer_common(device_, context_, queue_, numElements_);
+ return test_buffer_common(device_, context_, queue_, numElements_, false);
}
int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_,
cl_command_queue queue_, int numElements_)
@@ -78,7 +79,7 @@ int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_,
multiImport = true;
log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
"IN SAME CONTEXT...... \n\n");
- return test_buffer_common(device_, context_, queue_, numElements_);
+ return test_buffer_common(device_, context_, queue_, numElements_, false);
}
int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_,
cl_command_queue queue_, int numElements_)
@@ -88,7 +89,45 @@ int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_,
multiCtx = true;
log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
"IN DIFFERENT CONTEXT...... \n\n");
- return test_buffer_common(device_, context_, queue_, numElements_);
+ return test_buffer_common(device_, context_, queue_, numElements_, false);
+}
+int test_buffer_single_queue_fence(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_, true);
+}
+int test_buffer_multiple_queue_fence(cl_device_id device_, cl_context context_,
+ cl_command_queue queue_, int numElements_)
+{
+ params_reset();
+ numCQ = 2;
+ log_info("RUNNING TEST WITH TWO QUEUE...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_, true);
+}
+int test_buffer_multiImport_sameCtx_fence(cl_device_id device_,
+ cl_context context_,
+ cl_command_queue queue_,
+ int numElements_)
+{
+ params_reset();
+ multiImport = true;
+ log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
+ "IN SAME CONTEXT...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_, true);
+}
+int test_buffer_multiImport_diffCtx_fence(cl_device_id device_,
+ cl_context context_,
+ cl_command_queue queue_,
+ int numElements_)
+{
+ params_reset();
+ multiImport = true;
+ multiCtx = true;
+ log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
+ "IN DIFFERENT CONTEXT...... \n\n");
+ return test_buffer_common(device_, context_, queue_, numElements_, true);
}
int test_image_single_queue(cl_device_id device_, cl_context context_,
cl_command_queue queue_, int numElements_)
@@ -110,6 +149,10 @@ test_definition test_list[] = { ADD_TEST(buffer_single_queue),
ADD_TEST(buffer_multiple_queue),
ADD_TEST(buffer_multiImport_sameCtx),
ADD_TEST(buffer_multiImport_diffCtx),
+ ADD_TEST(buffer_single_queue_fence),
+ ADD_TEST(buffer_multiple_queue_fence),
+ ADD_TEST(buffer_multiImport_sameCtx_fence),
+ ADD_TEST(buffer_multiImport_diffCtx_fence),
ADD_TEST(image_single_queue),
ADD_TEST(image_multiple_queue),
ADD_TEST(consistency_external_buffer),
@@ -142,7 +185,6 @@ bool useSingleImageKernel = false;
bool useDeviceLocal = false;
bool disableNTHandleType = false;
bool enableOffset = false;
-bool non_dedicated = false;
static void printUsage(const char *execName)
{
@@ -189,10 +231,6 @@ size_t parseParams(int argc, const char *argv[], const char **argList)
{
enableOffset = true;
}
- if (!strcmp(argv[i], "--non_dedicated"))
- {
- non_dedicated = true;
- }
if (strcmp(argv[i], "-h") == 0)
{
printUsage(argv[0]);
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index f22ac319..d12b3bfe 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -81,10 +81,11 @@ int test_consistency_external_buffer(cl_device_id deviceID, cl_context _context,
const VulkanMemoryTypeList& memoryTypeList =
vkDummyBuffer.getMemoryTypeList();
- VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
- vkDevice, bufferSize, memoryTypeList[0], vkExternalMemoryHandleType);
VulkanBufferList vkBufferList(1, vkDevice, bufferSize,
vkExternalMemoryHandleType);
+ VulkanDeviceMemory* vkDeviceMem =
+ new VulkanDeviceMemory(vkDevice, vkBufferList[0], memoryTypeList[0],
+ vkExternalMemoryHandleType);
vkDeviceMem->bindBuffer(vkBufferList[0], 0);
@@ -231,22 +232,27 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context,
VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
getSupportedVulkanExternalMemoryHandleTypeList()[0];
- VulkanImage2D* vkImage2D =
- new VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height,
- 1, vkExternalMemoryHandleType);
- const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList();
- uint64_t totalImageMemSize = vkImage2D->getSize();
+ VulkanImageTiling vulkanImageTiling =
+ vkClExternalMemoryHandleTilingAssumption(
+ deviceID, vkExternalMemoryHandleType, &errNum);
+ ASSERT_SUCCESS(errNum, "Failed to query OpenCL tiling mode");
+
+ VulkanImage2D vkImage2D =
+ VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height,
+ vulkanImageTiling, 1, vkExternalMemoryHandleType);
+
+ const VulkanMemoryTypeList& memoryTypeList = vkImage2D.getMemoryTypeList();
+ uint64_t totalImageMemSize = vkImage2D.getSize();
log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]);
log_info("Memory type property: %d\n",
memoryTypeList[0].getMemoryTypeProperty());
log_info("Image size : %d\n", totalImageMemSize);
- VulkanDeviceMemory* vkDeviceMem =
- new VulkanDeviceMemory(vkDevice, totalImageMemSize, memoryTypeList[0],
- vkExternalMemoryHandleType);
- vkDeviceMem->bindImage(*vkImage2D, 0);
+ VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
+ vkDevice, vkImage2D, memoryTypeList[0], vkExternalMemoryHandleType);
+ vkDeviceMem->bindImage(vkImage2D, 0);
void* handle = NULL;
int fd;
@@ -299,7 +305,7 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context,
extMemProperties.push_back(0);
const VkImageCreateInfo VulkanImageCreateInfo =
- vkImage2D->getVkImageCreateInfo();
+ vkImage2D.getVkImageCreateInfo();
errNum = getCLImageInfoFromVkImageInfo(
&VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc);
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index 9b0bc9de..559625d7 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -21,6 +21,7 @@
#include <assert.h>
#include <vector>
#include <iostream>
+#include <memory>
#include <string.h>
#include "harness/errorHelpers.h"
@@ -82,7 +83,8 @@ __kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __g
int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
cl_command_queue &cmd_queue2, cl_kernel *kernel,
cl_kernel &verify_kernel, VulkanDevice &vkDevice,
- uint32_t numBuffers, uint32_t bufferSize)
+ uint32_t numBuffers, uint32_t bufferSize,
+ bool use_fence)
{
int err = CL_SUCCESS;
size_t global_work_size[1];
@@ -117,14 +119,18 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
- VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
- MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
@@ -136,10 +142,17 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
- clVk2CLExternalSemaphore = new clExternalSemaphore(
- vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
- clCl2VkExternalSemaphore = new clExternalSemaphore(
- vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ if (use_fence)
+ {
+ fence = std::make_shared<VulkanFence>(vkDevice);
+ }
+ else
+ {
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ }
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
@@ -179,9 +192,9 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
{
- vkBufferListDeviceMemory.push_back(
- new VulkanDeviceMemory(vkDevice, bufferSize, memoryType,
- vkExternalMemoryHandleType));
+ vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
+ vkDevice, vkBufferList[bIdx], memoryType,
+ vkExternalMemoryHandleType));
externalMemory.push_back(new clExternalMemory(
vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
0, bufferSize, context, deviceId));
@@ -200,8 +213,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
0);
buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
- vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]);
}
+ vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
vkCommandBuffer.begin();
vkCommandBuffer.bindPipeline(vkComputePipeline);
vkCommandBuffer.bindDescriptorSets(
@@ -227,16 +240,27 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
for (uint32_t iter = 0; iter < maxIter; iter++)
{
- if (iter == 0)
+ if (use_fence)
{
- vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ fence->reset();
+ vkQueue.submit(vkCommandBuffer, fence);
+ fence->wait();
}
else
{
- vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
- vkVk2CLSemaphore);
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
}
- clVk2CLExternalSemaphore->wait(cmd_queue1);
+
err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
(void *)&bufferSize);
@@ -286,7 +310,14 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
goto CLEANUP;
}
- if (iter != (maxIter - 1))
+ if (use_fence)
+ {
+ clFlush(cmd_queue1);
+ clFlush(cmd_queue2);
+ clFinish(cmd_queue1);
+ clFinish(cmd_queue2);
+ }
+ else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue2);
}
@@ -387,8 +418,11 @@ CLEANUP:
}
if (program) clReleaseProgram(program);
if (kernel_cq) clReleaseKernel(kernel_cq);
- if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
- if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ if (!use_fence)
+ {
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ }
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
@@ -398,7 +432,7 @@ CLEANUP:
int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
cl_kernel *kernel, cl_kernel &verify_kernel,
VulkanDevice &vkDevice, uint32_t numBuffers,
- uint32_t bufferSize)
+ uint32_t bufferSize, bool use_fence)
{
log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
size_t global_work_size[1];
@@ -416,13 +450,17 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
- VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
- MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
@@ -434,10 +472,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
- clVk2CLExternalSemaphore = new clExternalSemaphore(
- vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
- clCl2VkExternalSemaphore = new clExternalSemaphore(
- vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ if (use_fence)
+ {
+ fence = std::make_shared<VulkanFence>(vkDevice);
+ }
+ else
+ {
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ }
+
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
@@ -477,9 +523,9 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
{
- vkBufferListDeviceMemory.push_back(
- new VulkanDeviceMemory(vkDevice, bufferSize, memoryType,
- vkExternalMemoryHandleType));
+ vkBufferListDeviceMemory.push_back(new VulkanDeviceMemory(
+ vkDevice, vkBufferList[bIdx], memoryType,
+ vkExternalMemoryHandleType));
externalMemory.push_back(new clExternalMemory(
vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
0, bufferSize, context, deviceId));
@@ -498,8 +544,9 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
0);
buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
- vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]);
}
+ vkDescriptorSet.updateArray(1, vkBufferList.size(), vkBufferList);
+
vkCommandBuffer.begin();
vkCommandBuffer.bindPipeline(vkComputePipeline);
vkCommandBuffer.bindDescriptorSets(
@@ -526,16 +573,26 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
for (uint32_t iter = 0; iter < maxIter; iter++)
{
- if (iter == 0)
+ if (use_fence)
{
- vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ fence->reset();
+ vkQueue.submit(vkCommandBuffer, fence);
+ fence->wait();
}
else
{
- vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
- vkVk2CLSemaphore);
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
}
- clVk2CLExternalSemaphore->wait(cmd_queue1);
err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
(void *)&bufferSize);
@@ -562,7 +619,12 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
" error\n");
goto CLEANUP;
}
- if (iter != (maxIter - 1))
+ if (use_fence)
+ {
+ clFlush(cmd_queue1);
+ clFinish(cmd_queue1);
+ }
+ else if (!use_fence && (iter != (maxIter - 1)))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
@@ -656,8 +718,13 @@ CLEANUP:
delete externalMemory[i];
}
}
- if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
- if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+
+ if (!use_fence)
+ {
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ }
+
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
return err;
@@ -666,7 +733,7 @@ CLEANUP:
int run_test_with_multi_import_same_ctx(
cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel,
cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers,
- uint32_t bufferSize, uint32_t bufferSizeForOffset)
+ uint32_t bufferSize, uint32_t bufferSizeForOffset, float use_fence)
{
size_t global_work_size[1];
uint8_t *error_2;
@@ -687,14 +754,18 @@ int run_test_with_multi_import_same_ctx(
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
std::vector<char> vkBufferShader = readFile("buffer.spv");
VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
- VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
- MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, MAX_BUFFERS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
@@ -706,10 +777,18 @@ int run_test_with_multi_import_same_ctx(
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
- clVk2CLExternalSemaphore = new clExternalSemaphore(
- vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
- clCl2VkExternalSemaphore = new clExternalSemaphore(
- vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ if (use_fence)
+ {
+ fence = std::make_shared<VulkanFence>(vkDevice);
+ }
+ else
+ {
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ }
+
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
@@ -767,7 +846,7 @@ int run_test_with_multi_import_same_ctx(
if (withOffset == 0)
{
vkBufferListDeviceMemory.push_back(
- new VulkanDeviceMemory(vkDevice, pBufferSize,
+ new VulkanDeviceMemory(vkDevice, vkBufferList[bIdx],
memoryType,
vkExternalMemoryHandleType));
}
@@ -811,9 +890,8 @@ int run_test_with_multi_import_same_ctx(
externalMemory[bIdx][cl_bIdx]
->getExternalMemoryBuffer();
}
- vkDescriptorSet.update((uint32_t)bIdx + 1,
- vkBufferList[bIdx]);
}
+ vkDescriptorSet.updateArray(1, numBuffers, vkBufferList);
vkCommandBuffer.begin();
vkCommandBuffer.bindPipeline(vkComputePipeline);
vkCommandBuffer.bindDescriptorSets(
@@ -832,16 +910,34 @@ int run_test_with_multi_import_same_ctx(
for (uint32_t iter = 0; iter < maxIter; iter++)
{
- if (iter == 0)
+ if (use_fence)
{
- vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ fence->reset();
+ vkQueue.submit(vkCommandBuffer, fence);
+ fence->wait();
}
else
{
- vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
- vkVk2CLSemaphore);
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
}
- clVk2CLExternalSemaphore->wait(cmd_queue1);
+
+ if (use_fence)
+ {
+ fence->wait();
+ }
+ else
+ {
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
+ }
+
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
{
@@ -874,7 +970,11 @@ int run_test_with_multi_import_same_ctx(
goto CLEANUP;
}
}
- if (iter != (maxIter - 1))
+ if (use_fence)
+ {
+ clFinish(cmd_queue1);
+ }
+ else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
@@ -987,8 +1087,13 @@ CLEANUP:
}
}
}
- if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
- if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+
+ if (!use_fence)
+ {
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ }
+
if (error_2) free(error_2);
if (error_1) clReleaseMemObject(error_1);
return err;
@@ -998,7 +1103,8 @@ int run_test_with_multi_import_diff_ctx(
cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1,
cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2,
cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice,
- uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset)
+ uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset,
+ float use_fence)
{
size_t global_work_size[1];
uint8_t *error_3;
@@ -1023,6 +1129,7 @@ int run_test_with_multi_import_diff_ctx(
getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+ std::shared_ptr<VulkanFence> fence = nullptr;
VulkanQueue &vkQueue = vkDevice.getQueue();
@@ -1042,15 +1149,24 @@ int run_test_with_multi_import_diff_ctx(
VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
vkDescriptorSetLayout);
- clVk2CLExternalSemaphore = new clExternalSemaphore(
- vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
- clCl2VkExternalSemaphore = new clExternalSemaphore(
- vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
-
- clVk2CLExternalSemaphore2 = new clExternalSemaphore(
- vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
- clCl2VkExternalSemaphore2 = new clExternalSemaphore(
- vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
+ if (use_fence)
+ {
+ fence = std::make_shared<VulkanFence>(vkDevice);
+ }
+ else
+ {
+ clVk2CLExternalSemaphore = new clExternalSemaphore(
+ vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore = new clExternalSemaphore(
+ vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+ clVk2CLExternalSemaphore2 =
+ new clExternalSemaphore(vkVk2CLSemaphore, context2,
+ vkExternalSemaphoreHandleType, deviceId);
+ clCl2VkExternalSemaphore2 =
+ new clExternalSemaphore(vkCl2VkSemaphore, context2,
+ vkExternalSemaphoreHandleType, deviceId);
+ }
const uint32_t maxIter = innerIterations;
VulkanCommandPool vkCommandPool(vkDevice);
@@ -1192,16 +1308,33 @@ int run_test_with_multi_import_diff_ctx(
for (uint32_t iter = 0; iter < maxIter; iter++)
{
- if (iter == 0)
+ if (use_fence)
{
- vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ fence->reset();
+ vkQueue.submit(vkCommandBuffer, fence);
+ fence->wait();
}
else
{
- vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
- vkVk2CLSemaphore);
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+ }
+
+ if (use_fence)
+ {
+ fence->wait();
+ }
+ else
+ {
+ clVk2CLExternalSemaphore->wait(cmd_queue1);
}
- clVk2CLExternalSemaphore->wait(cmd_queue1);
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
@@ -1235,7 +1368,11 @@ int run_test_with_multi_import_diff_ctx(
goto CLEANUP;
}
}
- if (iter != (maxIter - 1))
+ if (use_fence)
+ {
+ clFinish(cmd_queue1);
+ }
+ else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore->signal(cmd_queue1);
}
@@ -1243,16 +1380,33 @@ int run_test_with_multi_import_diff_ctx(
clFinish(cmd_queue1);
for (uint32_t iter = 0; iter < maxIter; iter++)
{
- if (iter == 0)
+ if (use_fence)
{
- vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ fence->reset();
+ vkQueue.submit(vkCommandBuffer, fence);
+ fence->wait();
}
else
{
- vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
- vkVk2CLSemaphore);
+ if (iter == 0)
+ {
+ vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+ }
+ else
+ {
+ vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+ vkVk2CLSemaphore);
+ }
+ }
+
+ if (use_fence)
+ {
+ fence->wait();
+ }
+ else
+ {
+ clVk2CLExternalSemaphore2->wait(cmd_queue2);
}
- clVk2CLExternalSemaphore2->wait(cmd_queue2);
for (uint8_t launchIter = 0; launchIter < numImports;
launchIter++)
@@ -1286,7 +1440,11 @@ int run_test_with_multi_import_diff_ctx(
goto CLEANUP;
}
}
- if (iter != (maxIter - 1))
+ if (use_fence)
+ {
+ clFinish(cmd_queue2);
+ }
+ else if (!use_fence && iter != (maxIter - 1))
{
clCl2VkExternalSemaphore2->signal(cmd_queue2);
}
@@ -1474,10 +1632,15 @@ CLEANUP:
}
}
}
- if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
- if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
- if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
- if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
+
+ if (!use_fence)
+ {
+ if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+ if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+ if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
+ if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
+ }
+
if (error_3) free(error_3);
if (error_1) clReleaseMemObject(error_1);
if (error_2) clReleaseMemObject(error_2);
@@ -1485,7 +1648,8 @@ CLEANUP:
}
int test_buffer_common(cl_device_id device_, cl_context context_,
- cl_command_queue queue_, int numElements_)
+ cl_command_queue queue_, int numElements_,
+ float use_fence)
{
int current_device = 0;
@@ -1738,26 +1902,26 @@ int test_buffer_common(cl_device_id device_, cl_context context_,
{
errNum = run_test_with_multi_import_same_ctx(
context, cmd_queue1, kernel, verify_kernel, vkDevice,
- numBuffers, bufferSize, bufferSizeForOffset);
+ numBuffers, bufferSize, bufferSizeForOffset, use_fence);
}
else if (multiImport && multiCtx)
{
errNum = run_test_with_multi_import_diff_ctx(
context, context2, cmd_queue1, cmd_queue3, kernel, kernel2,
verify_kernel, verify_kernel2, vkDevice, numBuffers,
- bufferSize, bufferSizeForOffset);
+ bufferSize, bufferSizeForOffset, use_fence);
}
else if (numCQ == 2)
{
errNum = run_test_with_two_queue(
context, cmd_queue1, cmd_queue2, kernel, verify_kernel,
- vkDevice, numBuffers + 1, bufferSize);
+ vkDevice, numBuffers + 1, bufferSize, use_fence);
}
else
{
- errNum = run_test_with_one_queue(context, cmd_queue1, kernel,
- verify_kernel, vkDevice,
- numBuffers, bufferSize);
+ errNum = run_test_with_one_queue(
+ context, cmd_queue1, kernel, verify_kernel, vkDevice,
+ numBuffers, bufferSize, use_fence);
}
if (errNum != CL_SUCCESS)
{
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
index 47a31665..5f1f6e4b 100644
--- a/test_conformance/vulkan/test_vulkan_interop_image.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -226,9 +226,11 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
srcBufferPtr = (char *)malloc(maxImage2DSize);
dstBufferPtr = (char *)malloc(maxImage2DSize);
- VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
- VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
- VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
@@ -255,10 +257,10 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
- std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
- std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
- std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
- std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+ std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
+ std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
+ std::vector<clExternalMemoryImage *> externalMemory1;
+ std::vector<clExternalMemoryImage *> externalMemory2;
std::vector<char> vkImage2DShader;
for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
@@ -352,8 +354,6 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
VulkanExternalMemoryHandleType
vkExternalMemoryHandleType =
vkExternalMemoryHandleTypeList[emhtIdx];
- log_info("External memory handle type: %d \n",
- vkExternalMemoryHandleType);
if ((true == disableNTHandleType)
&& (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
== vkExternalMemoryHandleType))
@@ -361,9 +361,19 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
// Skip running for WIN32 NT handle.
continue;
}
+ log_info("External memory handle type: %d \n",
+ vkExternalMemoryHandleType);
+ VulkanImageTiling vulkanImageTiling =
+ vkClExternalMemoryHandleTilingAssumption(
+ deviceId,
+ vkExternalMemoryHandleTypeList[emhtIdx], &err);
+ ASSERT_SUCCESS(err,
+ "Failed to query OpenCL tiling mode");
+
VulkanImage2D vkDummyImage2D(
vkDevice, vkFormatList[0], widthList[0],
- heightList[0], 1, vkExternalMemoryHandleType);
+ heightList[0], vulkanImageTiling, 1,
+ vkExternalMemoryHandleType);
const VulkanMemoryTypeList &memoryTypeList =
vkDummyImage2D.getMemoryTypeList();
@@ -390,118 +400,73 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
{
VulkanImage2D vkImage2D(
vkDevice, vkFormat, width, height,
- numMipLevels, vkExternalMemoryHandleType);
+ vulkanImageTiling, numMipLevels,
+ vkExternalMemoryHandleType);
ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
totalImageMemSize =
ROUND_UP(vkImage2D.getSize(),
vkImage2D.getAlignment());
}
- VulkanImage2DList vkNonDedicatedImage2DList(
+ VulkanImage2DList vkImage2DList(
num2DImages, vkDevice, vkFormat, width, height,
- numMipLevels, vkExternalMemoryHandleType);
+ vulkanImageTiling, numMipLevels,
+ vkExternalMemoryHandleType);
for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
{
- if (non_dedicated)
- {
- vkNonDedicatedImage2DListDeviceMemory1
- .push_back(new VulkanDeviceMemory(
- vkDevice, totalImageMemSize,
- memoryType,
- vkExternalMemoryHandleType));
- }
- else
- {
- vkNonDedicatedImage2DListDeviceMemory1
- .push_back(new VulkanDeviceMemory(
- vkDevice,
- vkNonDedicatedImage2DList[bIdx],
- memoryType,
- vkExternalMemoryHandleType));
- }
- vkNonDedicatedImage2DListDeviceMemory1[bIdx]
- ->bindImage(vkNonDedicatedImage2DList[bIdx],
- 0);
- nonDedicatedExternalMemory1.push_back(
+ vkImage2DListDeviceMemory1.push_back(
+ new VulkanDeviceMemory(
+ vkDevice, vkImage2DList[bIdx],
+ memoryType,
+ vkExternalMemoryHandleType));
+ vkImage2DListDeviceMemory1[bIdx]->bindImage(
+ vkImage2DList[bIdx], 0);
+ externalMemory1.push_back(
new clExternalMemoryImage(
- *vkNonDedicatedImage2DListDeviceMemory1
- [bIdx],
+ *vkImage2DListDeviceMemory1[bIdx],
vkExternalMemoryHandleType, context,
totalImageMemSize, width, height, 0,
- vkNonDedicatedImage2DList[bIdx],
- deviceId));
+ vkImage2DList[bIdx], deviceId));
}
- VulkanImageViewList vkNonDedicatedImage2DViewList(
- vkDevice, vkNonDedicatedImage2DList);
- VulkanImage2DList vkNonDedicatedImage2DList2(
+ VulkanImageViewList vkImage2DViewList(
+ vkDevice, vkImage2DList);
+ VulkanImage2DList vkImage2DList2(
num2DImages, vkDevice, vkFormat, width, height,
- numMipLevels, vkExternalMemoryHandleType);
+ vulkanImageTiling, numMipLevels,
+ vkExternalMemoryHandleType);
for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
{
- if (non_dedicated)
- {
- vkNonDedicatedImage2DListDeviceMemory2
- .push_back(new VulkanDeviceMemory(
- vkDevice, totalImageMemSize,
- memoryType,
- vkExternalMemoryHandleType));
- }
- else
- {
- vkNonDedicatedImage2DListDeviceMemory2
- .push_back(new VulkanDeviceMemory(
- vkDevice,
- vkNonDedicatedImage2DList2[bIdx],
- memoryType,
- vkExternalMemoryHandleType));
- }
- vkNonDedicatedImage2DListDeviceMemory2[bIdx]
- ->bindImage(
- vkNonDedicatedImage2DList2[bIdx], 0);
- nonDedicatedExternalMemory2.push_back(
+ vkImage2DListDeviceMemory2.push_back(
+ new VulkanDeviceMemory(
+ vkDevice, vkImage2DList2[bIdx],
+ memoryType,
+ vkExternalMemoryHandleType));
+ vkImage2DListDeviceMemory2[bIdx]->bindImage(
+ vkImage2DList2[bIdx], 0);
+ externalMemory2.push_back(
new clExternalMemoryImage(
- *vkNonDedicatedImage2DListDeviceMemory2
- [bIdx],
+ *vkImage2DListDeviceMemory2[bIdx],
vkExternalMemoryHandleType, context,
totalImageMemSize, width, height, 0,
- vkNonDedicatedImage2DList2[bIdx],
- deviceId));
+ vkImage2DList2[bIdx], deviceId));
}
- VulkanImageViewList vkDedicatedImage2DViewList(
- vkDevice, vkNonDedicatedImage2DList2);
cl_mem external_mem_image1[5];
cl_mem external_mem_image2[5];
for (int i = 0; i < num2DImages; i++)
{
external_mem_image1[i] =
- nonDedicatedExternalMemory1[i]
+ externalMemory1[i]
->getExternalMemoryImage();
external_mem_image2[i] =
- nonDedicatedExternalMemory2[i]
+ externalMemory2[i]
->getExternalMemoryImage();
}
- VulkanImage2DList &vkImage2DList =
- vkNonDedicatedImage2DList;
- VulkanImageViewList &vkImage2DViewList =
- vkNonDedicatedImage2DViewList;
clCl2VkExternalSemaphore->signal(cmd_queue1);
if (!useSingleImageKernel)
{
- for (size_t i2DIdx = 0;
- i2DIdx < vkImage2DList.size(); i2DIdx++)
- {
- for (uint32_t mipLevel = 0;
- mipLevel < numMipLevels; mipLevel++)
- {
- uint32_t i2DvIdx =
- (uint32_t)(i2DIdx * numMipLevels)
- + mipLevel;
- vkDescriptorSet.update(
- 1 + i2DvIdx,
- vkImage2DViewList[i2DvIdx]);
- }
- }
+ vkDescriptorSet.updateArray(1,
+ vkImage2DViewList);
vkCopyCommandBuffer.begin();
vkCopyCommandBuffer.pipelineBarrier(
vkImage2DList,
@@ -743,29 +708,25 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
}
for (int i = 0; i < num2DImages; i++)
{
- delete vkNonDedicatedImage2DListDeviceMemory1
- [i];
- delete vkNonDedicatedImage2DListDeviceMemory2
- [i];
- delete nonDedicatedExternalMemory1[i];
- delete nonDedicatedExternalMemory2[i];
+ delete vkImage2DListDeviceMemory1[i];
+ delete vkImage2DListDeviceMemory2[i];
+ delete externalMemory1[i];
+ delete externalMemory2[i];
}
- vkNonDedicatedImage2DListDeviceMemory1.erase(
- vkNonDedicatedImage2DListDeviceMemory1.begin(),
- vkNonDedicatedImage2DListDeviceMemory1.begin()
- + num2DImages);
- vkNonDedicatedImage2DListDeviceMemory2.erase(
- vkNonDedicatedImage2DListDeviceMemory2.begin(),
- vkNonDedicatedImage2DListDeviceMemory2.begin()
+ vkImage2DListDeviceMemory1.erase(
+ vkImage2DListDeviceMemory1.begin(),
+ vkImage2DListDeviceMemory1.begin()
+ num2DImages);
- nonDedicatedExternalMemory1.erase(
- nonDedicatedExternalMemory1.begin(),
- nonDedicatedExternalMemory1.begin()
- + num2DImages);
- nonDedicatedExternalMemory2.erase(
- nonDedicatedExternalMemory2.begin(),
- nonDedicatedExternalMemory2.begin()
+ vkImage2DListDeviceMemory2.erase(
+ vkImage2DListDeviceMemory2.begin(),
+ vkImage2DListDeviceMemory2.begin()
+ num2DImages);
+ externalMemory1.erase(externalMemory1.begin(),
+ externalMemory1.begin()
+ + num2DImages);
+ externalMemory2.erase(externalMemory2.begin(),
+ externalMemory2.begin()
+ + num2DImages);
if (CL_SUCCESS != err)
{
goto CLEANUP;
@@ -822,9 +783,11 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
srcBufferPtr = (char *)malloc(maxImage2DSize);
dstBufferPtr = (char *)malloc(maxImage2DSize);
- VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
- VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
- VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
+ VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList;
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 0, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1);
+ vkDescriptorSetLayoutBindingList.addBinding(
+ 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
VulkanDescriptorSetLayout vkDescriptorSetLayout(
vkDevice, vkDescriptorSetLayoutBindingList);
VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
@@ -851,10 +814,10 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
clCl2VkExternalSemaphore = new clExternalSemaphore(
vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
- std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
- std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
- std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
- std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+ std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory1;
+ std::vector<VulkanDeviceMemory *> vkImage2DListDeviceMemory2;
+ std::vector<clExternalMemoryImage *> externalMemory1;
+ std::vector<clExternalMemoryImage *> externalMemory2;
std::vector<char> vkImage2DShader;
for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
@@ -957,9 +920,18 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
// Skip running for WIN32 NT handle.
continue;
}
+
+ VulkanImageTiling vulkanImageTiling =
+ vkClExternalMemoryHandleTilingAssumption(
+ deviceId,
+ vkExternalMemoryHandleTypeList[emhtIdx], &err);
+ ASSERT_SUCCESS(err,
+ "Failed to query OpenCL tiling mode");
+
VulkanImage2D vkDummyImage2D(
vkDevice, vkFormatList[0], widthList[0],
- heightList[0], 1, vkExternalMemoryHandleType);
+ heightList[0], vulkanImageTiling, 1,
+ vkExternalMemoryHandleType);
const VulkanMemoryTypeList &memoryTypeList =
vkDummyImage2D.getMemoryTypeList();
@@ -985,98 +957,78 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
{
VulkanImage2D vkImage2D(
vkDevice, vkFormat, width, height,
- numMipLevels, vkExternalMemoryHandleType);
+ vulkanImageTiling, numMipLevels,
+ vkExternalMemoryHandleType);
ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
totalImageMemSize =
ROUND_UP(vkImage2D.getSize(),
vkImage2D.getAlignment());
}
- VulkanImage2DList vkNonDedicatedImage2DList(
+ VulkanImage2DList vkImage2DList(
num2DImages, vkDevice, vkFormat, width, height,
- numMipLevels, vkExternalMemoryHandleType);
- for (size_t bIdx = 0;
- bIdx < vkNonDedicatedImage2DList.size();
+ vulkanImageTiling, numMipLevels,
+ vkExternalMemoryHandleType);
+ for (size_t bIdx = 0; bIdx < vkImage2DList.size();
bIdx++)
{
// Create list of Vulkan device memories and
// bind the list of Vulkan images.
- vkNonDedicatedImage2DListDeviceMemory1
- .push_back(new VulkanDeviceMemory(
- vkDevice, totalImageMemSize, memoryType,
+ vkImage2DListDeviceMemory1.push_back(
+ new VulkanDeviceMemory(
+ vkDevice, vkImage2DList[bIdx],
+ memoryType,
vkExternalMemoryHandleType));
- vkNonDedicatedImage2DListDeviceMemory1[bIdx]
- ->bindImage(vkNonDedicatedImage2DList[bIdx],
- 0);
- nonDedicatedExternalMemory1.push_back(
+ vkImage2DListDeviceMemory1[bIdx]->bindImage(
+ vkImage2DList[bIdx], 0);
+ externalMemory1.push_back(
new clExternalMemoryImage(
- *vkNonDedicatedImage2DListDeviceMemory1
- [bIdx],
+ *vkImage2DListDeviceMemory1[bIdx],
vkExternalMemoryHandleType, context,
totalImageMemSize, width, height, 0,
- vkNonDedicatedImage2DList[bIdx],
- deviceId));
+ vkImage2DList[bIdx], deviceId));
}
- VulkanImageViewList vkNonDedicatedImage2DViewList(
- vkDevice, vkNonDedicatedImage2DList);
+ VulkanImageViewList vkImage2DViewList(
+ vkDevice, vkImage2DList);
- VulkanImage2DList vkNonDedicatedImage2DList2(
+ VulkanImage2DList vkImage2DList2(
num2DImages, vkDevice, vkFormat, width, height,
- numMipLevels, vkExternalMemoryHandleType);
- for (size_t bIdx = 0;
- bIdx < vkNonDedicatedImage2DList2.size();
+ vulkanImageTiling, numMipLevels,
+ vkExternalMemoryHandleType);
+ for (size_t bIdx = 0; bIdx < vkImage2DList2.size();
bIdx++)
{
- vkNonDedicatedImage2DListDeviceMemory2
- .push_back(new VulkanDeviceMemory(
- vkDevice, totalImageMemSize, memoryType,
+ vkImage2DListDeviceMemory2.push_back(
+ new VulkanDeviceMemory(
+ vkDevice, vkImage2DList2[bIdx],
+ memoryType,
vkExternalMemoryHandleType));
- vkNonDedicatedImage2DListDeviceMemory2[bIdx]
- ->bindImage(
- vkNonDedicatedImage2DList2[bIdx], 0);
- nonDedicatedExternalMemory2.push_back(
+ vkImage2DListDeviceMemory2[bIdx]->bindImage(
+ vkImage2DList2[bIdx], 0);
+ externalMemory2.push_back(
new clExternalMemoryImage(
- *vkNonDedicatedImage2DListDeviceMemory2
- [bIdx],
+ *vkImage2DListDeviceMemory2[bIdx],
vkExternalMemoryHandleType, context,
totalImageMemSize, width, height, 0,
- vkNonDedicatedImage2DList2[bIdx],
- deviceId));
+ vkImage2DList2[bIdx], deviceId));
}
- VulkanImageViewList vkDedicatedImage2DViewList(
- vkDevice, vkNonDedicatedImage2DList2);
+
cl_mem external_mem_image1[4];
cl_mem external_mem_image2[4];
for (int i = 0; i < num2DImages; i++)
{
external_mem_image1[i] =
- nonDedicatedExternalMemory1[i]
+ externalMemory1[i]
->getExternalMemoryImage();
external_mem_image2[i] =
- nonDedicatedExternalMemory2[i]
+ externalMemory2[i]
->getExternalMemoryImage();
}
- VulkanImage2DList &vkImage2DList =
- vkNonDedicatedImage2DList;
- VulkanImageViewList &vkImage2DViewList =
- vkNonDedicatedImage2DViewList;
clCl2VkExternalSemaphore->signal(cmd_queue1);
if (!useSingleImageKernel)
{
- for (size_t i2DIdx = 0;
- i2DIdx < vkImage2DList.size(); i2DIdx++)
- {
- for (uint32_t mipLevel = 0;
- mipLevel < numMipLevels; mipLevel++)
- {
- uint32_t i2DvIdx =
- (uint32_t)(i2DIdx * numMipLevels)
- + mipLevel;
- vkDescriptorSet.update(
- 1 + i2DvIdx,
- vkImage2DViewList[i2DvIdx]);
- }
- }
+ vkDescriptorSet.updateArray(1,
+ vkImage2DViewList);
vkCopyCommandBuffer.begin();
vkCopyCommandBuffer.pipelineBarrier(
vkImage2DList,
@@ -1275,29 +1227,25 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
}
for (int i = 0; i < num2DImages; i++)
{
- delete vkNonDedicatedImage2DListDeviceMemory1
- [i];
- delete vkNonDedicatedImage2DListDeviceMemory2
- [i];
- delete nonDedicatedExternalMemory1[i];
- delete nonDedicatedExternalMemory2[i];
+ delete vkImage2DListDeviceMemory1[i];
+ delete vkImage2DListDeviceMemory2[i];
+ delete externalMemory1[i];
+ delete externalMemory2[i];
}
- vkNonDedicatedImage2DListDeviceMemory1.erase(
- vkNonDedicatedImage2DListDeviceMemory1.begin(),
- vkNonDedicatedImage2DListDeviceMemory1.begin()
- + num2DImages);
- vkNonDedicatedImage2DListDeviceMemory2.erase(
- vkNonDedicatedImage2DListDeviceMemory2.begin(),
- vkNonDedicatedImage2DListDeviceMemory2.begin()
- + num2DImages);
- nonDedicatedExternalMemory1.erase(
- nonDedicatedExternalMemory1.begin(),
- nonDedicatedExternalMemory1.begin()
+ vkImage2DListDeviceMemory1.erase(
+ vkImage2DListDeviceMemory1.begin(),
+ vkImage2DListDeviceMemory1.begin()
+ num2DImages);
- nonDedicatedExternalMemory2.erase(
- nonDedicatedExternalMemory2.begin(),
- nonDedicatedExternalMemory2.begin()
+ vkImage2DListDeviceMemory2.erase(
+ vkImage2DListDeviceMemory2.begin(),
+ vkImage2DListDeviceMemory2.begin()
+ num2DImages);
+ externalMemory1.erase(externalMemory1.begin(),
+ externalMemory1.begin()
+ + num2DImages);
+ externalMemory2.erase(externalMemory2.begin(),
+ externalMemory2.begin()
+ + num2DImages);
if (CL_SUCCESS != err)
{
goto CLEANUP;
diff --git a/test_conformance/vulkan/vulkan_interop_common.hpp b/test_conformance/vulkan/vulkan_interop_common.hpp
index 18d84f09..a1162407 100644
--- a/test_conformance/vulkan/vulkan_interop_common.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common.hpp
@@ -45,6 +45,5 @@ extern bool useDeviceLocal;
extern bool disableNTHandleType;
// Enable offset for multiImport of vulkan device memory
extern bool enableOffset;
-extern bool non_dedicated;
#endif // _vulkan_interop_common_hpp_
diff --git a/test_conformance/workgroups/test_wg_all.cpp b/test_conformance/workgroups/test_wg_all.cpp
index 41abd124..f9b574e4 100644
--- a/test_conformance/workgroups/test_wg_all.cpp
+++ b/test_conformance/workgroups/test_wg_all.cpp
@@ -75,7 +75,6 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu
size_t wg_size[1];
size_t num_elements;
int err;
- int i;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1,
@@ -110,7 +109,7 @@ test_work_group_all(cl_device_id device, cl_context context, cl_command_queue qu
p = input_ptr[0];
d = init_genrand( gRandomSeed );
- for (i=0; i<(num_elements+1); i++)
+ for (size_t i = 0; i < (num_elements + 1); i++)
{
p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
}
diff --git a/test_conformance/workgroups/test_wg_any.cpp b/test_conformance/workgroups/test_wg_any.cpp
index e0242cfb..f7ff899a 100644
--- a/test_conformance/workgroups/test_wg_any.cpp
+++ b/test_conformance/workgroups/test_wg_any.cpp
@@ -75,7 +75,6 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu
size_t wg_size[1];
size_t num_elements;
int err;
- int i;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1,
@@ -110,7 +109,7 @@ test_work_group_any(cl_device_id device, cl_context context, cl_command_queue qu
p = input_ptr[0];
d = init_genrand( gRandomSeed );
- for (i=0; i<(num_elements+1); i++)
+ for (size_t i = 0; i < (num_elements + 1); i++)
{
p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
}
diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp
index e24ac7b9..a4cb0c6f 100644
--- a/test_conformance/workgroups/test_wg_broadcast.cpp
+++ b/test_conformance/workgroups/test_wg_broadcast.cpp
@@ -70,7 +70,7 @@ verify_wg_broadcast_1D(float *inptr, float *outptr, size_t n, size_t wg_size)
for (i=0,group_id=0; i<n; i+=wg_size,group_id++)
{
- int local_size = (n-i) > wg_size ? wg_size : (n-i);
+ size_t local_size = (n - i) > wg_size ? wg_size : (n - i);
float broadcast_result = inptr[i + (group_id % local_size)];
for (j=0; j<local_size; j++)
{
@@ -172,7 +172,6 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command
size_t wg_size[1];
size_t num_elements;
int err;
- int i;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1,
@@ -207,7 +206,7 @@ test_work_group_broadcast_1D(cl_device_id device, cl_context context, cl_command
p = input_ptr[0];
d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
+ for (size_t i = 0; i < num_elements; i++)
{
p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
}
@@ -278,7 +277,6 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command
size_t num_workgroups;
size_t num_elements;
int err;
- int i;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1,
@@ -333,7 +331,7 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command
p = input_ptr[0];
d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
+ for (size_t i = 0; i < num_elements; i++)
{
p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
}
@@ -402,7 +400,6 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command
size_t num_workgroups;
size_t num_elements;
int err;
- int i;
MTdata d;
err = create_single_kernel_helper(context, &program, &kernel, 1,
@@ -458,7 +455,7 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command
p = input_ptr[0];
d = init_genrand( gRandomSeed );
- for (i=0; i<num_elements; i++)
+ for (size_t i = 0; i < num_elements; i++)
{
p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
}
diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
index 648e68ce..a31fca63 100644
--- a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
+++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
@@ -219,10 +219,8 @@ int do_test(cl_device_id device, cl_context context, cl_command_queue queue,
int do_test_work_group_suggested_local_size(
cl_device_id device, cl_context context, cl_command_queue queue,
bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr,
- cl_long max_local_mem_size, size_t global_work_offset[], num_dims dim)
+ cl_ulong max_local_mem_size, size_t global_work_offset[], num_dims dim)
{
- clProgramWrapper scan_program;
- clKernelWrapper scan_kernel;
int err;
size_t test_values[] = { 1, 1, 1 };
std::string kernel_names[6] = {
@@ -244,6 +242,8 @@ int do_test_work_group_suggested_local_size(
for (int kernel_num = 0; kernel_num < 6; kernel_num++)
{
if (max_local_mem_size < local_mem_size[kernel_num]) continue;
+ clProgramWrapper scan_program;
+ clKernelWrapper scan_kernel;
// Create the kernel
err = create_single_kernel_helper(
context, &scan_program, &scan_kernel, 1,
@@ -300,7 +300,7 @@ int test_work_group_suggested_local_size_1D(cl_device_id device,
"Skipping the test.\n");
return TEST_SKIPPED_ITSELF;
}
- cl_long max_local_mem_size;
+ cl_ulong max_local_mem_size;
cl_int err =
clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(max_local_mem_size), &max_local_mem_size, NULL);